In [1]:
import math
import os
import pandas as pd
import sys

In [2]:
# choose input file
input_filename = "jabref_pr_output.csv"

# get project name
# To do this, input file must have format <project_name>_<rest of filename>.csv, e.g. jabref_example.csv
proj_name      = input_filename.split( "_" )[0]
print( "project name: " + proj_name )

# choose directory where all input and output data will be written
data_dir       = "/home/j/git/osl/ETL2-Pipeline/data/"
input_dir      = data_dir + "input/"
output_dir     = data_dir + "output/"
proj_out_dir   = output_dir + proj_name

# create input file path
input_file = input_dir + input_filename

# create project-specific output dirs
os.makedirs( proj_out_dir, exist_ok=True )

# choose dir for intermediate outputs
new_fields_dir  = output_dir + "new_fields/"
proj_fields_dir = new_fields_dir + proj_name

os.makedirs( proj_fields_dir, exist_ok=True )

project name: jabref


In [3]:
# for the sake of consistency, we want to make sure that the projects are consistently named. Use all lowercase, e.g. presto instead of Presto
mergeNA_output_filename = output_dir + proj_name + "_df_mergeNA.csv"
print( mergeNA_output_filename )

/home/j/git/osl/ETL2-Pipeline/data/output/jabref_df_mergeNA.csv


In [4]:
merge_output_filename = output_dir + proj_name + "_df_merge.csv"
print( merge_output_filename )

/home/j/git/osl/ETL2-Pipeline/data/output/jabref_df_merge.csv


In [5]:
# read out the contents of the input file. Change separator if necessary
prIssues = pd.read_csv(input_file, header = 0, sep="\a")

In [6]:
pd.options.display.max_seq_items = 2000
pd.options.display.max_colwidth = 90
pd.options.display.max_rows = 999

In [7]:
prIssues.head(10)

Unnamed: 0,Issue_Number,Issue_Title,Issue_Author_Name,Issue_Author_Login,Issue_Closed_Date,Issue_Body,Issue_Comments,PR_Title,PR_Author_Name,PR_Author_Login,PR_Closed_Date,PR_Body,PR_Comments,Commit_Author_Name,Commit_Date,Commit_Message,isPR
0,1,New Sorting/Export preferences,Olaf Lenz,olenz,"03/12/14, 11:38:01 AM","This will add a new ""File Sorting"" Tab to the preferences that allows to exactly speci...",,New Sorting/Export preferences,Olaf Lenz,olenz,"03/12/14, 11:38:01 AM","This will add a new ""File Sorting"" Tab to the preferences that allows to exactly speci...",,Olaf Lenz,"03/12/14, 09:16:45 AM",Merge branch 'sorting',1
1,2,Basic gradle integration,Simon Harrer,simonharrer,"03/12/14, 06:29:22 PM",This adds basic gradle integration. The project files for Intellij and Eclipse can be ...,,Basic gradle integration,Simon Harrer,simonharrer,"03/12/14, 06:29:22 PM",This adds basic gradle integration. The project files for Intellij and Eclipse can be ...,,Simon Harrer,"03/12/14, 05:20:08 PM",Gradle build works basically. Reuses existing folder structure.,1
2,3,Some example new Dutch translations via the GitHub web page.,Egon Willighagen,egonw,"03/15/14, 12:55:32 PM",,,Some example new Dutch translations via the GitHub web page.,Egon Willighagen,egonw,"03/15/14, 12:55:32 PM",,,Egon Willighagen,"03/15/14, 09:33:10 AM",Some example new Dutch translations via the GitHub web page.,1
3,4,Spanish translation update,Jorge Tornero,jtornero,"03/17/14, 10:20:50 PM",Three new strings translated.,"Even though the other translation files have been updated, I accept this pull request....",Spanish translation update,Jorge Tornero,jtornero,"03/17/14, 10:20:50 PM",Three new strings translated.,,jtornero,"03/17/14, 09:28:26 PM",Spanish translation update,1
4,5,Update JabRef_in.properties,,was123,"03/18/14, 05:12:53 AM",Indonesian translation added,,Update JabRef_in.properties,,was123,"03/18/14, 05:12:53 AM",Indonesian translation added,,was123,"03/18/14, 12:01:58 AM",Update JabRef_in.properties\n\nIndonesian translation added,1


In [9]:
def clean(issue):
    buffer = ""
    
    for char in issue:
        if char.isdigit():
            buffer += char
        else:
            break

    print( "cleaned:", buffer )
    return buffer


def searchIssue(text):
    
    # define vars
    issues = []
    prs = []
    i = 0
    
    
    if text == '' or (isinstance(text, float) and  math.isnan(text)):
        print("PR:",prnumber," title is NaN:", text)
        
    else:
        while ( i < len( text )):
            print("PR:",prnumber," searching from:",i,"limit:",len(text))
            
            if (prnumber == 2300):
                print ("Analyzing", text)
                
            resultTralha = text.find('#',i)
            
            if (prnumber == 2300):
                print ("tralha position", resultTralha)
                
            if (resultTralha != -1):
                next = resultTralha + 5 #issue number with 4 digits

                if (next < len(text)):
                    num = resultTralha + 1
                    numberIssue = text[resultTralha+1:next]
                    print("PR:",prnumber," Issue:",numberIssue)
                    numberIssue = clean(numberIssue)
                    
                    if numberIssue == '' or numberIssue == '333':
                        print("Issue discarded after clean",numberIssue)
                        
                    else:
                        print("Issue added",numberIssue)
                        issues.append(numberIssue)
                        prs.append(prnumber)
                        
                else:
                    next = resultTralha + 4 # issue number < 4
                    if (next < len(text)):
                        num = resultTralha+1
                        numberIssue = text[resultTralha+1:next]
                        print("PR:",prnumber," Issue:",numberIssue)
                        numberIssue = clean(numberIssue)
                        
                        if numberIssue == '' or numberIssue == '333':
                            print("Issue discarded after clean",numberIssue)
                            
                        else:
                            print("Issue added",numberIssue)
                            issues.append(numberIssue)
                    
                            prs.append(prnumber)
                        
                    else:
                        break
            else:
                break
                
            i=resultTralha+1
            
    return issues, prs


globalIssues = []
prsToLink = []
trainPRs = []
testPRs = []

for row in prIssues.itertuples(index=True, name='Pandas'):

    pr_title = row.PR_Title
    pr_body = row.PR_Body
    issue_number = row.Issue_Number
    ispr = row.isPR
    
    if (ispr==1):
        resList = []
        prsList = []
        resList, prsList = searchIssue(pr_title)
        
        if len(resList) !=0:
            globalIssues.extend(resList)
            prsToLink.extend(prsList)
            testPRs.extend(prsList)

            
        resList = []
        prsList = []
        
        #search body
        resList, prsList = searchIssue(pr_body)
        if len(resList) !=0:
            
            globalIssues.extend(resList)
            prsToLink.extend(prsList)
            testPRs.extend(prsList)
            
            

PR: 1  searching from: 0 limit: 30
PR: 1  searching from: 0 limit: 201
PR: 2  searching from: 0 limit: 24
PR: 2  searching from: 0 limit: 188
PR: 3  searching from: 0 limit: 60
PR: 3  title is NaN: nan
PR: 4  searching from: 0 limit: 26
PR: 4  searching from: 0 limit: 29
PR: 5  searching from: 0 limit: 27
PR: 5  searching from: 0 limit: 28
PR: 6  searching from: 0 limit: 20
PR: 6  title is NaN: nan
PR: 7  searching from: 0 limit: 56
PR: 7  Issue: 841
cleaned: 841
Issue added 841
PR: 7  searching from: 52 limit: 56
PR: 7  searching from: 0 limit: 941
PR: 8  searching from: 0 limit: 49
PR: 8  searching from: 0 limit: 2307
PR: 8  Issue: 869 
cleaned: 869
Issue added 869
PR: 8  searching from: 2217 limit: 2307
PR: 9  searching from: 0 limit: 34
PR: 9  searching from: 0 limit: 110
PR: 10  searching from: 0 limit: 43
PR: 10  searching from: 0 limit: 482
PR: 11  searching from: 0 limit: 42
PR: 11  Issue: 880 
cleaned: 880
Issue added 880
PR: 11  searching from: 17 limit: 42
PR: 11  searching 

PR: 371  searching from: 0 limit: 25
PR: 371  searching from: 0 limit: 47
PR: 373  searching from: 0 limit: 34
PR: 373  searching from: 0 limit: 139
PR: 374  searching from: 0 limit: 41
PR: 374  searching from: 0 limit: 163
PR: 375  searching from: 0 limit: 43
PR: 375  searching from: 0 limit: 173
PR: 377  searching from: 0 limit: 32
PR: 377  searching from: 0 limit: 79
PR: 378  searching from: 0 limit: 20
PR: 378  searching from: 0 limit: 159
PR: 379  searching from: 0 limit: 31
PR: 379  searching from: 0 limit: 1761
PR: 379  Issue:  nov
cleaned: 
Issue discarded after clean 
PR: 379  searching from: 289 limit: 1761
PR: 379  Issue: nov#
cleaned: 
Issue discarded after clean 
PR: 379  searching from: 314 limit: 1761
PR: 379  Issue: `. I
cleaned: 
Issue discarded after clean 
PR: 379  searching from: 318 limit: 1761
PR: 380  searching from: 0 limit: 36
PR: 380  searching from: 0 limit: 369
PR: 381  searching from: 0 limit: 52
PR: 381  searching from: 0 limit: 224
PR: 387  searching from

PR: 1041  searching from: 0 limit: 51
PR: 1041  searching from: 0 limit: 489
PR: 1041  Issue: 963.
cleaned: 963
Issue added 963
PR: 1041  searching from: 20 limit: 489
PR: 1042  searching from: 0 limit: 14
PR: 1042  searching from: 0 limit: 336
PR: 1045  searching from: 0 limit: 36
PR: 1045  searching from: 0 limit: 260
PR: 1046  searching from: 0 limit: 19
PR: 1046  searching from: 0 limit: 460
PR: 1046  Issue: # 
-
cleaned: 
Issue discarded after clean 
PR: 1046  searching from: 339 limit: 460
PR: 1046  Issue:  
- 
cleaned: 
Issue discarded after clean 
PR: 1046  searching from: 340 limit: 460
PR: 1047  searching from: 0 limit: 28
PR: 1047  searching from: 0 limit: 533
PR: 1048  searching from: 0 limit: 44
PR: 1048  searching from: 0 limit: 167
PR: 1050  searching from: 0 limit: 45
PR: 1050  Issue: 103
cleaned: 103
Issue added 103
PR: 1050  searching from: 41 limit: 45
PR: 1050  searching from: 0 limit: 615
PR: 1052  searching from: 0 limit: 10
PR: 1052  searching from: 0 limit: 483


PR: 1526  searching from: 238 limit: 727
PR: 1529  searching from: 0 limit: 79
PR: 1529  searching from: 0 limit: 79
PR: 1532  searching from: 0 limit: 44
PR: 1532  searching from: 0 limit: 366
PR: 1535  searching from: 0 limit: 26
PR: 1535  searching from: 0 limit: 11
PR: 1535  Issue: 1533
cleaned: 1533
Issue added 1533
PR: 1535  searching from: 6 limit: 11
PR: 1537  searching from: 0 limit: 25
PR: 1537  searching from: 0 limit: 80
PR: 1539  searching from: 0 limit: 66
PR: 1539  Issue: 1500
cleaned: 1500
Issue added 1500
PR: 1539  searching from: 5 limit: 66
PR: 1539  searching from: 0 limit: 296
PR: 1541  searching from: 0 limit: 78
PR: 1541  searching from: 0 limit: 356
PR: 1544  searching from: 0 limit: 52
PR: 1544  searching from: 0 limit: 256
PR: 1545  searching from: 0 limit: 55
PR: 1545  Issue: 148
cleaned: 148
Issue added 148
PR: 1545  searching from: 51 limit: 55
PR: 1545  searching from: 0 limit: 122
PR: 1545  Issue: 1484
cleaned: 1484
Issue added 1484
PR: 1545  searching fr

PR: 1925  searching from: 0 limit: 33
PR: 1925  searching from: 0 limit: 917
PR: 1925  Issue: 1885
cleaned: 1885
Issue added 1885
PR: 1925  searching from: 178 limit: 917
PR: 1925  Issue: 1923
cleaned: 1923
Issue added 1923
PR: 1925  searching from: 209 limit: 917
PR: 1926  searching from: 0 limit: 54
PR: 1926  searching from: 0 limit: 107
PR: 1926  Issue: 1922
cleaned: 1922
Issue added 1922
PR: 1926  searching from: 7 limit: 107
PR: 1927  searching from: 0 limit: 19
PR: 1927  searching from: 0 limit: 83
PR: 1928  searching from: 0 limit: 22
PR: 1928  searching from: 0 limit: 1620
PR: 1928  Issue: 119.
cleaned: 119
Issue added 119
PR: 1928  searching from: 12 limit: 1620
PR: 1928  Issue: ## P
cleaned: 
Issue discarded after clean 
PR: 1928  searching from: 271 limit: 1620
PR: 1928  Issue: # Pr
cleaned: 
Issue discarded after clean 
PR: 1928  searching from: 272 limit: 1620
PR: 1928  Issue:  Pre
cleaned: 
Issue discarded after clean 
PR: 1928  searching from: 273 limit: 1620
PR: 1928  I

PR: 2291  searching from: 0 limit: 623
PR: 2295  searching from: 0 limit: 30
PR: 2295  searching from: 0 limit: 521
PR: 2295  Issue: 2294
cleaned: 2294
Issue added 2294
PR: 2295  searching from: 433 limit: 521
PR: 2296  searching from: 0 limit: 45
PR: 2296  searching from: 0 limit: 542
PR: 2297  searching from: 0 limit: 57
PR: 2297  searching from: 0 limit: 652
PR: 2300  searching from: 0 limit: 49
Analyzing Fix local metadata synchronization (fixes: #2219)
tralha position 43
PR: 2300  Issue: 2219
cleaned: 2219
Issue added 2219
PR: 2300  searching from: 44 limit: 49
Analyzing Fix local metadata synchronization (fixes: #2219)
tralha position -1
PR: 2300  searching from: 0 limit: 398
Analyzing Issue: https://github.com/JabRef/jabref/issues/2219.

This issue is fixed by overloading and using the `parse(...)` method in `MetaDataParser`.

The reason was a reset of the local field `metaData` in `DBMSSynchronizer`. This caused other instances containing this field not to be up-to-date.


Issue added 2803
PR: 2818  searching from: 154 limit: 240
PR: 2819  searching from: 0 limit: 51
PR: 2819  searching from: 0 limit: 355
PR: 2824  searching from: 0 limit: 42
PR: 2824  searching from: 0 limit: 597
PR: 2826  searching from: 0 limit: 56
PR: 2826  searching from: 0 limit: 577
PR: 2826  Issue: 2825
cleaned: 2825
Issue added 2825
PR: 2826  searching from: 41 limit: 577
PR: 2827  searching from: 0 limit: 58
PR: 2827  searching from: 0 limit: 473
PR: 2835  searching from: 0 limit: 17
PR: 2835  searching from: 0 limit: 502
PR: 2836  searching from: 0 limit: 22
PR: 2836  searching from: 0 limit: 512
PR: 2840  searching from: 0 limit: 58
PR: 2840  searching from: 0 limit: 723
PR: 2841  searching from: 0 limit: 91
PR: 2841  searching from: 0 limit: 63
PR: 2844  searching from: 0 limit: 45
PR: 2844  searching from: 0 limit: 810
PR: 2845  searching from: 0 limit: 51
PR: 2845  Issue: 2786
cleaned: 2786
Issue added 2786
PR: 2845  searching from: 11 limit: 51
PR: 2845  searching from: 0

Issue discarded after clean 
PR: 3433  searching from: 2008 limit: 2227
PR: 3438  searching from: 0 limit: 60
PR: 3438  searching from: 0 limit: 368
PR: 3438  Issue: 3432
cleaned: 3432
Issue added 3432
PR: 3438  searching from: 346 limit: 368
PR: 3439  searching from: 0 limit: 36
PR: 3439  searching from: 0 limit: 563
PR: 3441  searching from: 0 limit: 58
PR: 3441  searching from: 0 limit: 690
PR: 3441  Issue: 3434
cleaned: 3434
Issue added 3434
PR: 3441  searching from: 12 limit: 690
PR: 3442  searching from: 0 limit: 29
PR: 3442  searching from: 0 limit: 473
PR: 3443  searching from: 0 limit: 19
PR: 3443  Issue: 338
cleaned: 338
Issue added 338
PR: 3443  searching from: 15 limit: 19
PR: 3443  searching from: 0 limit: 3401
PR: 3443  Issue: 3381
cleaned: 3381
Issue added 3381
PR: 3443  searching from: 208 limit: 3401
PR: 3443  Issue: 3381
cleaned: 3381
Issue added 3381
PR: 3443  searching from: 2821 limit: 3401
PR: 3444  searching from: 0 limit: 41
PR: 3444  Issue: 1664
cleaned: 1664
I

PR: 3880  searching from: 114 limit: 1926
PR: 3880  Issue: 49](
cleaned: 49
Issue added 49
PR: 3880  searching from: 175 limit: 1926
PR: 3882  searching from: 0 limit: 16
PR: 3882  searching from: 0 limit: 527
PR: 3883  searching from: 0 limit: 49
PR: 3883  searching from: 0 limit: 1026
PR: 3883  Issue: 333]
cleaned: 333
Issue discarded after clean 333
PR: 3883  searching from: 114 limit: 1026
PR: 3883  Issue: 49](
cleaned: 49
Issue added 49
PR: 3883  searching from: 175 limit: 1026
PR: 3884  searching from: 0 limit: 29
PR: 3884  searching from: 0 limit: 889
PR: 3884  Issue: 333]
cleaned: 333
Issue discarded after clean 333
PR: 3884  searching from: 114 limit: 889
PR: 3884  Issue: 49](
cleaned: 49
Issue added 49
PR: 3884  searching from: 175 limit: 889
PR: 3886  searching from: 0 limit: 12
PR: 3886  searching from: 0 limit: 1106
PR: 3886  Issue: 333]
cleaned: 333
Issue discarded after clean 333
PR: 3886  searching from: 114 limit: 1106
PR: 3886  Issue: 49](
cleaned: 49
Issue added 49
P

Issue discarded after clean 333
PR: 4286  searching from: 213 limit: 906
PR: 4286  Issue: 49](
cleaned: 49
Issue added 49
PR: 4286  searching from: 274 limit: 906
PR: 4289  searching from: 0 limit: 31
PR: 4289  searching from: 0 limit: 865
PR: 4289  Issue: 4233
cleaned: 4233
Issue added 4233
PR: 4289  searching from: 7 limit: 865
PR: 4289  Issue: 333]
cleaned: 333
Issue discarded after clean 333
PR: 4289  searching from: 172 limit: 865
PR: 4289  Issue: 49](
cleaned: 49
Issue added 49
PR: 4289  searching from: 233 limit: 865
PR: 4290  searching from: 0 limit: 24
PR: 4290  searching from: 0 limit: 849
PR: 4290  Issue: 333]
cleaned: 333
Issue discarded after clean 333
PR: 4290  searching from: 156 limit: 849
PR: 4290  Issue: 49](
cleaned: 49
Issue added 49
PR: 4290  searching from: 217 limit: 849
PR: 4291  searching from: 0 limit: 47
PR: 4291  searching from: 0 limit: 1212
PR: 4291  Issue: 4072
cleaned: 4072
Issue added 4072
PR: 4291  searching from: 7 limit: 1212
PR: 4291  Issue: 333]
cl

Issue added 785
PR: 4541  searching from: 2434 limit: 5726
PR: 4541  Issue: 778]
cleaned: 778
Issue added 778
PR: 4541  searching from: 2717 limit: 5726
PR: 4541  Issue: 777]
cleaned: 777
Issue added 777
PR: 4541  searching from: 2949 limit: 5726
PR: 4541  Issue:  (de
cleaned: 
Issue discarded after clean 
PR: 4541  searching from: 3736 limit: 5726
PR: 4541  Issue:  (de
cleaned: 
Issue discarded after clean 
PR: 4541  searching from: 3773 limit: 5726
PR: 4542  searching from: 0 limit: 39
PR: 4542  searching from: 0 limit: 9574
PR: 4542  Issue: # AN
cleaned: 
Issue discarded after clean 
PR: 4542  searching from: 211 limit: 9574
PR: 4542  Issue:  ANT
cleaned: 
Issue discarded after clean 
PR: 4542  searching from: 212 limit: 9574
PR: 4542  Issue: # Is
cleaned: 
Issue discarded after clean 
PR: 4542  searching from: 228 limit: 9574
PR: 4542  Issue:  Iss
cleaned: 
Issue discarded after clean 
PR: 4542  searching from: 229 limit: 9574
PR: 4542  Issue: # Im
cleaned: 
Issue discarded after c

PR: 4663  searching from: 1363 limit: 4790
PR: 4663  Issue: 1600
cleaned: 1600
Issue added 1600
PR: 4663  searching from: 1583 limit: 4790
PR: 4663  Issue: 1617
cleaned: 1617
Issue added 1617
PR: 4663  searching from: 1977 limit: 4790
PR: 4663  Issue:  (de
cleaned: 
Issue discarded after clean 
PR: 4663  searching from: 2710 limit: 4790
PR: 4663  Issue:  (de
cleaned: 
Issue discarded after clean 
PR: 4663  searching from: 2747 limit: 4790
PR: 4675  searching from: 0 limit: 46
PR: 4675  searching from: 0 limit: 2849
PR: 4675  Issue:  (de
cleaned: 
Issue discarded after clean 
PR: 4675  searching from: 769 limit: 2849
PR: 4675  Issue:  (de
cleaned: 
Issue discarded after clean 
PR: 4675  searching from: 806 limit: 2849
PR: 4692  searching from: 0 limit: 41
PR: 4692  searching from: 0 limit: 1765
PR: 4692  Issue: 333]
cleaned: 333
Issue discarded after clean 333
PR: 4692  searching from: 114 limit: 1765
PR: 4692  Issue: 49](
cleaned: 49
Issue added 49
PR: 4692  searching from: 175 limit: 

cleaned: 
Issue discarded after clean 
PR: 4777  searching from: 228 limit: 10729
PR: 4777  Issue:  Arc
cleaned: 
Issue discarded after clean 
PR: 4777  searching from: 229 limit: 10729
PR: 4777  Issue:  Bug
cleaned: 
Issue discarded after clean 
PR: 4777  searching from: 249 limit: 10729
PR: 4777  Issue: # Ar
cleaned: 
Issue discarded after clean 
PR: 4777  searching from: 404 limit: 10729
PR: 4777  Issue:  Arc
cleaned: 
Issue discarded after clean 
PR: 4777  searching from: 405 limit: 10729
PR: 4777  Issue:  Bre
cleaned: 
Issue discarded after clean 
PR: 4777  searching from: 425 limit: 10729
PR: 4777  Issue:  New
cleaned: 
Issue discarded after clean 
PR: 4777  searching from: 909 limit: 10729
PR: 4777  Issue:  Enh
cleaned: 
Issue discarded after clean 
PR: 4777  searching from: 2198 limit: 10729
PR: 4777  Issue: # Co
cleaned: 
Issue discarded after clean 
PR: 4777  searching from: 2220 limit: 10729
PR: 4777  Issue:  Cor
cleaned: 
Issue discarded after clean 
PR: 4777  searching fro

PR: 4899  Issue: 810]
cleaned: 810
Issue added 810
PR: 4899  searching from: 1485 limit: 5237
PR: 4899  Issue: 810]
cleaned: 810
Issue added 810
PR: 4899  searching from: 1906 limit: 5237
PR: 4899  Issue: 804]
cleaned: 804
Issue added 804
PR: 4899  searching from: 2270 limit: 5237
PR: 4899  Issue:  (de
cleaned: 
Issue discarded after clean 
PR: 4899  searching from: 3135 limit: 5237
PR: 4899  Issue:  (de
cleaned: 
Issue discarded after clean 
PR: 4899  searching from: 3172 limit: 5237
PR: 4900  searching from: 0 limit: 30
PR: 4900  searching from: 0 limit: 2655
PR: 4900  Issue:  (de
cleaned: 
Issue discarded after clean 
PR: 4900  searching from: 553 limit: 2655
PR: 4900  Issue:  (de
cleaned: 
Issue discarded after clean 
PR: 4900  searching from: 590 limit: 2655
PR: 4901  searching from: 0 limit: 29
PR: 4901  searching from: 0 limit: 2842
PR: 4901  Issue:  (de
cleaned: 
Issue discarded after clean 
PR: 4901  searching from: 7 limit: 2842
PR: 4901  Issue:  (de
cleaned: 
Issue discarded

PR: 5067  searching from: 247 limit: 7398
PR: 5067  Issue: # Kn
cleaned: 
Issue discarded after clean 
PR: 5067  searching from: 248 limit: 7398
PR: 5067  Issue:  Kno
cleaned: 
Issue discarded after clean 
PR: 5067  searching from: 249 limit: 7398
PR: 5067  Issue: ## C
cleaned: 
Issue discarded after clean 
PR: 5067  searching from: 730 limit: 7398
PR: 5067  Issue: # Ch
cleaned: 
Issue discarded after clean 
PR: 5067  searching from: 731 limit: 7398
PR: 5067  Issue:  Cha
cleaned: 
Issue discarded after clean 
PR: 5067  searching from: 732 limit: 7398
PR: 5067  Issue: getT
cleaned: 
Issue discarded after clean 
PR: 5067  searching from: 952 limit: 7398
PR: 5067  Issue: ## A
cleaned: 
Issue discarded after clean 
PR: 5067  searching from: 1042 limit: 7398
PR: 5067  Issue: # Ad
cleaned: 
Issue discarded after clean 
PR: 5067  searching from: 1043 limit: 7398
PR: 5067  Issue:  Add
cleaned: 
Issue discarded after clean 
PR: 5067  searching from: 1044 limit: 7398
PR: 5067  Issue: getF
cleane

PR: 5150  searching from: 0 limit: 746
PR: 5150  Issue: 4629
cleaned: 4629
Issue added 4629
PR: 5150  searching from: 5 limit: 746
PR: 5150  Issue: 4629
cleaned: 4629
Issue added 4629
PR: 5150  searching from: 111 limit: 746
PR: 5153  searching from: 0 limit: 33
PR: 5153  searching from: 0 limit: 5058
PR: 5153  Issue: # ch
cleaned: 
Issue discarded after clean 
PR: 5153  searching from: 219 limit: 5058
PR: 5153  Issue:  che
cleaned: 
Issue discarded after clean 
PR: 5153  searching from: 220 limit: 5058
PR: 5153  Issue: Rele
cleaned: 
Issue discarded after clean 
PR: 5153  searching from: 280 limit: 5058
PR: 5153  Issue: 6910
cleaned: 6910
Issue added 6910
PR: 5153  searching from: 1378 limit: 5058
PR: 5153  Issue: 6805
cleaned: 6805
Issue added 6805
PR: 5153  searching from: 1616 limit: 5058
PR: 5153  Issue: 5008
cleaned: 5008
Issue added 5008
PR: 5153  searching from: 1868 limit: 5058
PR: 5153  Issue: 3383
cleaned: 3383
Issue added 3383
PR: 5153  searching from: 2093 limit: 5058
PR: 

Issue added 5124
PR: 5268  searching from: 1394 limit: 4943
PR: 5268  Issue: 7021
cleaned: 7021
Issue added 7021
PR: 5268  searching from: 1912 limit: 4943
PR: 5268  Issue: 2116
cleaned: 2116
Issue added 2116
PR: 5268  searching from: 2136 limit: 4943
PR: 5268  Issue:  (de
cleaned: 
Issue discarded after clean 
PR: 5268  searching from: 2971 limit: 4943
PR: 5268  Issue:  (de
cleaned: 
Issue discarded after clean 
PR: 5268  searching from: 3090 limit: 4943
PR: 5269  searching from: 0 limit: 42
PR: 5269  searching from: 0 limit: 2529
PR: 5269  Issue:  (de
cleaned: 
Issue discarded after clean 
PR: 5269  searching from: 557 limit: 2529
PR: 5269  Issue:  (de
cleaned: 
Issue discarded after clean 
PR: 5269  searching from: 676 limit: 2529
PR: 5271  searching from: 0 limit: 36
PR: 5271  searching from: 0 limit: 4334
PR: 5271  Issue: # 28
cleaned: 
Issue discarded after clean 
PR: 5271  searching from: 199 limit: 4334
PR: 5271  Issue:  28.
cleaned: 
Issue discarded after clean 
PR: 5271  sear

PR: 5391  searching from: 434 limit: 1066
PR: 5392  searching from: 0 limit: 47
PR: 5392  searching from: 0 limit: 601
PR: 5392  Issue: 5359
cleaned: 5359
Issue added 5359
PR: 5392  searching from: 7 limit: 601
PR: 5393  searching from: 0 limit: 18
PR: 5393  searching from: 0 limit: 1082
PR: 5393  Issue: 5364
cleaned: 5364
Issue added 5364
PR: 5393  searching from: 477 limit: 1082
PR: 5394  searching from: 0 limit: 34
PR: 5394  searching from: 0 limit: 893
PR: 5394  Issue: 333]
cleaned: 333
Issue discarded after clean 333
PR: 5394  searching from: 200 limit: 893
PR: 5394  Issue: 49](
cleaned: 49
Issue added 49
PR: 5394  searching from: 261 limit: 893
PR: 5395  searching from: 0 limit: 34
PR: 5395  searching from: 0 limit: 1082
PR: 5395  Issue: 5169
cleaned: 5169
Issue added 5169
PR: 5395  searching from: 266 limit: 1082
PR: 5395  Issue: 333]
cleaned: 333
Issue discarded after clean 333
PR: 5395  searching from: 389 limit: 1082
PR: 5395  Issue: 49](
cleaned: 49
Issue added 49
PR: 5395  

PR: 5567  searching from: 308 limit: 3777
PR: 5567  Issue: 4381
cleaned: 4381
Issue added 4381
PR: 5567  searching from: 534 limit: 3777
PR: 5567  Issue: 4366
cleaned: 4366
Issue added 4366
PR: 5567  searching from: 783 limit: 3777
PR: 5567  Issue: 4378
cleaned: 4378
Issue added 4378
PR: 5567  searching from: 1014 limit: 3777
PR: 5567  Issue: 4379
cleaned: 4379
Issue added 4379
PR: 5567  searching from: 1243 limit: 3777
PR: 5567  Issue: 4380
cleaned: 4380
Issue added 4380
PR: 5567  searching from: 1502 limit: 3777
PR: 5567  Issue:  (de
cleaned: 
Issue discarded after clean 
PR: 5567  searching from: 1962 limit: 3777
PR: 5567  Issue:  (de
cleaned: 
Issue discarded after clean 
PR: 5567  searching from: 2081 limit: 3777
PR: 5568  searching from: 0 limit: 63
PR: 5568  searching from: 0 limit: 2570
PR: 5568  Issue: 199]
cleaned: 199
Issue added 199
PR: 5568  searching from: 295 limit: 2570
PR: 5568  Issue:  (de
cleaned: 
Issue discarded after clean 
PR: 5568  searching from: 755 limit: 257

cleaned: 
Issue discarded after clean 
PR: 5695  searching from: 691 limit: 5045
PR: 5695  Issue:  28.
cleaned: 
Issue discarded after clean 
PR: 5695  searching from: 692 limit: 5045
PR: 5695  Issue: 782]
cleaned: 782
Issue added 782
PR: 5695  searching from: 1674 limit: 5045
PR: 5695  Issue:  (de
cleaned: 
Issue discarded after clean 
PR: 5695  searching from: 3230 limit: 5045
PR: 5695  Issue:  (de
cleaned: 
Issue discarded after clean 
PR: 5695  searching from: 3349 limit: 5045
PR: 5696  searching from: 0 limit: 39
PR: 5696  searching from: 0 limit: 7075
PR: 5696  Issue: # 3.
cleaned: 
Issue discarded after clean 
PR: 5696  searching from: 232 limit: 7075
PR: 5696  Issue:  3.2
cleaned: 
Issue discarded after clean 
PR: 5696  searching from: 233 limit: 7075
PR: 5696  Issue: 319]
cleaned: 319
Issue added 319
PR: 5696  searching from: 534 limit: 7075
PR: 5696  Issue: 321]
cleaned: 321
Issue added 321
PR: 5696  searching from: 701 limit: 7075
PR: 5696  Issue: # 3.
cleaned: 
Issue discar

PR: 5865  Issue: L40
cleaned: 
Issue discarded after clean 
PR: 5865  searching from: 1496 limit: 2677
PR: 5865  Issue: L159
cleaned: 
Issue discarded after clean 
PR: 5865  searching from: 1684 limit: 2677
PR: 5865  Issue: 333.
cleaned: 333
Issue discarded after clean 333
PR: 5865  searching from: 1883 limit: 2677
PR: 5865  Issue: 47](
cleaned: 47
Issue added 47
PR: 5865  searching from: 1958 limit: 2677
PR: 5868  searching from: 0 limit: 10
PR: 5868  Issue: 5862
cleaned: 5862
Issue added 5862
PR: 5868  searching from: 5 limit: 10
PR: 5868  searching from: 0 limit: 972
PR: 5868  Issue: 333.
cleaned: 333
Issue discarded after clean 333
PR: 5868  searching from: 178 limit: 972
PR: 5868  Issue: 47](
cleaned: 47
Issue added 47
PR: 5868  searching from: 253 limit: 972
PR: 5869  searching from: 0 limit: 48
PR: 5869  searching from: 0 limit: 1061
PR: 5869  Issue: 4877
cleaned: 4877
Issue added 4877
PR: 5869  searching from: 90 limit: 1061
PR: 5869  Issue: 333.
cleaned: 333
Issue discarded a

cleaned: 333
Issue discarded after clean 333
PR: 6072  searching from: 110 limit: 904
PR: 6072  Issue: 47](
cleaned: 47
Issue added 47
PR: 6072  searching from: 185 limit: 904
PR: 6075  searching from: 0 limit: 24
PR: 6075  title is NaN: nan
PR: 6079  searching from: 0 limit: 24
PR: 6079  title is NaN: nan
PR: 6080  searching from: 0 limit: 41
PR: 6080  searching from: 0 limit: 223
PR: 6080  Issue: 6056
cleaned: 6056
Issue added 6056
PR: 6080  searching from: 218 limit: 223
PR: 6082  searching from: 0 limit: 41
PR: 6082  searching from: 0 limit: 8081
PR: 6082  Issue: 162<
cleaned: 162
Issue added 162
PR: 6082  searching from: 754 limit: 8081
PR: 6082  Issue: 171<
cleaned: 171
Issue added 171
PR: 6082  searching from: 1162 limit: 8081
PR: 6082  Issue: 166<
cleaned: 166
Issue added 166
PR: 6082  searching from: 1400 limit: 8081
PR: 6082  Issue: 29</
cleaned: 29
Issue added 29
PR: 6082  searching from: 1676 limit: 8081
PR: 6082  Issue: 29</
cleaned: 29
Issue added 29
PR: 6082  searching f

PR: 6315  searching from: 0 limit: 1248
PR: 6315  Issue: 4373
cleaned: 4373
Issue added 4373
PR: 6315  searching from: 7 limit: 1248
PR: 6315  Issue: 5230
cleaned: 5230
Issue added 5230
PR: 6315  searching from: 143 limit: 1248
PR: 6315  Issue: 333"
cleaned: 333
Issue discarded after clean 333
PR: 6315  searching from: 345 limit: 1248
PR: 6316  searching from: 0 limit: 64
PR: 6316  searching from: 0 limit: 2818
PR: 6316  Issue: 333"
cleaned: 333
Issue discarded after clean 333
PR: 6316  searching from: 106 limit: 2818
PR: 6316  Issue: L33
cleaned: 
Issue discarded after clean 
PR: 6316  searching from: 654 limit: 2818
PR: 6316  Issue: L50
cleaned: 
Issue discarded after clean 
PR: 6316  searching from: 794 limit: 2818
PR: 6318  searching from: 0 limit: 36
PR: 6318  searching from: 0 limit: 4204
PR: 6318  Issue:  (de
cleaned: 
Issue discarded after clean 
PR: 6318  searching from: 2380 limit: 4204
PR: 6318  Issue:  (de
cleaned: 
Issue discarded after clean 
PR: 6318  searching from: 2

PR: 6516  searching from: 557 limit: 1512
PR: 6518  searching from: 0 limit: 31
PR: 6518  searching from: 0 limit: 702
PR: 6518  Issue: 6509
cleaned: 6509
Issue added 6509
PR: 6518  searching from: 14 limit: 702
PR: 6518  Issue: 6515
cleaned: 6515
Issue added 6515
PR: 6518  searching from: 194 limit: 702
PR: 6519  searching from: 0 limit: 33
PR: 6519  searching from: 0 limit: 22667
PR: 6520  searching from: 0 limit: 54
PR: 6520  searching from: 0 limit: 2662
PR: 6520  Issue:  (de
cleaned: 
Issue discarded after clean 
PR: 6520  searching from: 838 limit: 2662
PR: 6520  Issue:  (de
cleaned: 
Issue discarded after clean 
PR: 6520  searching from: 966 limit: 2662
PR: 6521  searching from: 0 limit: 37
PR: 6521  searching from: 0 limit: 2611
PR: 6521  Issue:  (de
cleaned: 
Issue discarded after clean 
PR: 6521  searching from: 787 limit: 2611
PR: 6521  Issue:  (de
cleaned: 
Issue discarded after clean 
PR: 6521  searching from: 915 limit: 2611
PR: 6522  searching from: 0 limit: 59
PR: 6522 

PR: 6682  searching from: 143 limit: 651
PR: 6683  searching from: 0 limit: 38
PR: 6683  searching from: 0 limit: 573
PR: 6687  searching from: 0 limit: 67
PR: 6687  searching from: 0 limit: 1153
PR: 6689  searching from: 0 limit: 30
PR: 6689  searching from: 0 limit: 689
PR: 6693  searching from: 0 limit: 16
PR: 6693  searching from: 0 limit: 63
PR: 6693  Issue: 6692
cleaned: 6692
Issue added 6692
PR: 6693  searching from: 7 limit: 63
PR: 6695  searching from: 0 limit: 33
PR: 6695  searching from: 0 limit: 2391
PR: 6695  Issue:  Tes
cleaned: 
Issue discarded after clean 
PR: 6695  searching from: 1 limit: 2391
PR: 6695  Issue: # Co
cleaned: 
Issue discarded after clean 
PR: 6695  searching from: 43 limit: 2391
PR: 6695  Issue:  Con
cleaned: 
Issue discarded after clean 
PR: 6695  searching from: 44 limit: 2391
PR: 6695  Issue: # De
cleaned: 
Issue discarded after clean 
PR: 6695  searching from: 412 limit: 2391
PR: 6695  Issue:  Dec
cleaned: 
Issue discarded after clean 
PR: 6695  sea

cleaned: 
Issue discarded after clean 
PR: 6810  searching from: 3463 limit: 4255
PR: 6810  Issue:  (de
cleaned: 
Issue discarded after clean 
PR: 6810  searching from: 3500 limit: 4255
PR: 6811  searching from: 0 limit: 47
PR: 6811  searching from: 0 limit: 1342
PR: 6811  Issue:  (de
cleaned: 
Issue discarded after clean 
PR: 6811  searching from: 550 limit: 1342
PR: 6811  Issue:  (de
cleaned: 
Issue discarded after clean 
PR: 6811  searching from: 587 limit: 1342
PR: 6812  searching from: 0 limit: 37
PR: 6812  searching from: 0 limit: 4690
PR: 6812  Issue: filt
cleaned: 
Issue discarded after clean 
PR: 6812  searching from: 364 limit: 4690
PR: 6812  Issue: 468<
cleaned: 468
Issue added 468
PR: 6812  searching from: 511 limit: 4690
PR: 6812  Issue: 468<
cleaned: 468
Issue added 468
PR: 6812  searching from: 879 limit: 4690
PR: 6812  Issue: 468<
cleaned: 468
Issue added 468
PR: 6812  searching from: 2087 limit: 4690
PR: 6812  Issue: 470<
cleaned: 470
Issue added 470
PR: 6812  searchin

PR: 7026  Issue: 6891
cleaned: 6891
Issue added 6891
PR: 7026  searching from: 207 limit: 2348
PR: 7027  searching from: 0 limit: 39
PR: 7027  searching from: 0 limit: 2660
PR: 7027  Issue: 1920
cleaned: 1920
Issue added 1920
PR: 7027  searching from: 505 limit: 2660
PR: 7027  Issue: 1929
cleaned: 1929
Issue added 1929
PR: 7027  searching from: 998 limit: 2660
PR: 7027  Issue:  (de
cleaned: 
Issue discarded after clean 
PR: 7027  searching from: 1868 limit: 2660
PR: 7027  Issue:  (de
cleaned: 
Issue discarded after clean 
PR: 7027  searching from: 1905 limit: 2660
PR: 7028  searching from: 0 limit: 48
PR: 7028  searching from: 0 limit: 1344
PR: 7028  Issue:  (de
cleaned: 
Issue discarded after clean 
PR: 7028  searching from: 552 limit: 1344
PR: 7028  Issue:  (de
cleaned: 
Issue discarded after clean 
PR: 7028  searching from: 589 limit: 1344
PR: 7029  searching from: 0 limit: 41
PR: 7029  searching from: 0 limit: 3434
PR: 7029  Issue: 373<
cleaned: 373
Issue added 373
PR: 7029  search

PR: 7185  searching from: 76 limit: 81
PR: 7185  searching from: 0 limit: 521
PR: 7185  Issue: 7083
cleaned: 7083
Issue added 7083
PR: 7185  searching from: 82 limit: 521
PR: 7185  Issue: 7093
cleaned: 7093
Issue added 7093
PR: 7185  searching from: 104 limit: 521
PR: 7187  searching from: 0 limit: 72
PR: 7187  searching from: 0 limit: 1423
PR: 7187  Issue: abou
cleaned: 
Issue discarded after clean 
PR: 7187  searching from: 428 limit: 1423
PR: 7187  Issue:  (de
cleaned: 
Issue discarded after clean 
PR: 7187  searching from: 631 limit: 1423
PR: 7187  Issue:  (de
cleaned: 
Issue discarded after clean 
PR: 7187  searching from: 668 limit: 1423
PR: 7188  searching from: 0 limit: 37
PR: 7188  searching from: 0 limit: 3990
PR: 7188  Issue: exte
cleaned: 
Issue discarded after clean 
PR: 7188  searching from: 313 limit: 3990
PR: 7188  Issue: 486<
cleaned: 486
Issue added 486
PR: 7188  searching from: 565 limit: 3990
PR: 7188  Issue: getS
cleaned: 
Issue discarded after clean 
PR: 7188  sea

Issue discarded after clean 
PR: 7338  searching from: 1836 limit: 4257
PR: 7338  Issue: ### 
cleaned: 
Issue discarded after clean 
PR: 7338  searching from: 2202 limit: 4257
PR: 7338  Issue: ## C
cleaned: 
Issue discarded after clean 
PR: 7338  searching from: 2203 limit: 4257
PR: 7338  Issue: # Ch
cleaned: 
Issue discarded after clean 
PR: 7338  searching from: 2204 limit: 4257
PR: 7338  Issue:  Che
cleaned: 
Issue discarded after clean 
PR: 7338  searching from: 2205 limit: 4257
PR: 7338  Issue: getP
cleaned: 
Issue discarded after clean 
PR: 7338  searching from: 2368 limit: 4257
PR: 7338  Issue: issu
cleaned: 
Issue discarded after clean 
PR: 7338  searching from: 3911 limit: 4257
PR: 7339  searching from: 0 limit: 53
PR: 7339  searching from: 0 limit: 514
PR: 7347  searching from: 0 limit: 33
PR: 7347  searching from: 0 limit: 810
PR: 7347  Issue: 7043
cleaned: 7043
Issue added 7043
PR: 7347  searching from: 26 limit: 810
PR: 7348  searching from: 0 limit: 35
PR: 7348  searching

PR: 7479  searching from: 6077 limit: 9642
PR: 7479  Issue: 2205
cleaned: 2205
Issue added 2205
PR: 7479  searching from: 6326 limit: 9642
PR: 7479  Issue: 2194
cleaned: 2194
Issue added 2194
PR: 7479  searching from: 6570 limit: 9642
PR: 7479  Issue: 2198
cleaned: 2198
Issue added 2198
PR: 7479  searching from: 6835 limit: 9642
PR: 7479  Issue: 2199
cleaned: 2199
Issue added 2199
PR: 7479  searching from: 7098 limit: 9642
PR: 7479  Issue: 2195
cleaned: 2195
Issue added 2195
PR: 7479  searching from: 7362 limit: 9642
PR: 7479  Issue: 2196
cleaned: 2196
Issue added 2196
PR: 7479  searching from: 7621 limit: 9642
PR: 7479  Issue: 2190
cleaned: 2190
Issue added 2190
PR: 7479  searching from: 7880 limit: 9642
PR: 7479  Issue: 2154
cleaned: 2154
Issue added 2154
PR: 7479  searching from: 8101 limit: 9642
PR: 7479  Issue: abou
cleaned: 
Issue discarded after clean 
PR: 7479  searching from: 8647 limit: 9642
PR: 7479  Issue:  (de
cleaned: 
Issue discarded after clean 
PR: 7479  searching from

PR: 7646  Issue: 295<
cleaned: 295
Issue added 295
PR: 7646  searching from: 1125 limit: 6126
PR: 7646  Issue: 551<
cleaned: 551
Issue added 551
PR: 7646  searching from: 1534 limit: 6126
PR: 7646  Issue: 553<
cleaned: 553
Issue added 553
PR: 7646  searching from: 1752 limit: 6126
PR: 7646  Issue: _sof
cleaned: 
Issue discarded after clean 
PR: 7646  searching from: 2105 limit: 6126
PR: 7646  Issue: 572<
cleaned: 572
Issue added 572
PR: 7646  searching from: 2231 limit: 6126
PR: 7646  Issue: 572<
cleaned: 572
Issue added 572
PR: 7646  searching from: 3383 limit: 6126
PR: 7646  Issue: 571<
cleaned: 571
Issue added 571
PR: 7646  searching from: 4298 limit: 6126
PR: 7646  Issue: abou
cleaned: 
Issue discarded after clean 
PR: 7646  searching from: 5131 limit: 6126
PR: 7646  Issue:  (de
cleaned: 
Issue discarded after clean 
PR: 7646  searching from: 5334 limit: 6126
PR: 7646  Issue:  (de
cleaned: 
Issue discarded after clean 
PR: 7646  searching from: 5371 limit: 6126
PR: 7647  searching 

cleaned: 
Issue discarded after clean 
PR: 7800  searching from: 5198 limit: 6509
PR: 7800  Issue:  (de
cleaned: 
Issue discarded after clean 
PR: 7800  searching from: 5235 limit: 6509
PR: 7801  searching from: 0 limit: 39
PR: 7801  searching from: 0 limit: 9960
PR: 7801  Issue: 2313
cleaned: 2313
Issue added 2313
PR: 7801  searching from: 807 limit: 9960
PR: 7801  Issue: 2313
cleaned: 2313
Issue added 2313
PR: 7801  searching from: 909 limit: 9960
PR: 7801  Issue: 2312
cleaned: 2312
Issue added 2312
PR: 7801  searching from: 1051 limit: 9960
PR: 7801  Issue: 2312
cleaned: 2312
Issue added 2312
PR: 7801  searching from: 1153 limit: 9960
PR: 7801  Issue: 2310
cleaned: 2310
Issue added 2310
PR: 7801  searching from: 1300 limit: 9960
PR: 7801  Issue: 2310
cleaned: 2310
Issue added 2310
PR: 7801  searching from: 1402 limit: 9960
PR: 7801  Issue: 2309
cleaned: 2309
Issue added 2309
PR: 7801  searching from: 1559 limit: 9960
PR: 7801  Issue: 2309
cleaned: 2309
Issue added 2309
PR: 7801  sea

In [10]:
type(next)

builtin_function_or_method

In [11]:
type(prtitle)

str

In [12]:
testPRs

[7,
 8,
 11,
 12,
 12,
 14,
 14,
 17,
 26,
 28,
 28,
 28,
 28,
 28,
 29,
 29,
 36,
 38,
 39,
 42,
 52,
 54,
 137,
 137,
 143,
 143,
 165,
 175,
 175,
 182,
 183,
 187,
 189,
 200,
 204,
 204,
 211,
 211,
 219,
 230,
 258,
 287,
 300,
 300,
 304,
 319,
 320,
 330,
 347,
 352,
 357,
 370,
 391,
 396,
 418,
 429,
 429,
 440,
 452,
 452,
 452,
 452,
 452,
 452,
 452,
 459,
 472,
 476,
 491,
 500,
 515,
 560,
 567,
 568,
 569,
 594,
 594,
 595,
 595,
 600,
 602,
 605,
 605,
 626,
 648,
 660,
 663,
 677,
 677,
 677,
 677,
 683,
 690,
 694,
 694,
 711,
 715,
 716,
 718,
 718,
 718,
 718,
 718,
 739,
 752,
 755,
 793,
 799,
 828,
 842,
 842,
 844,
 853,
 853,
 858,
 864,
 874,
 874,
 877,
 879,
 879,
 884,
 887,
 893,
 893,
 903,
 915,
 925,
 926,
 929,
 937,
 942,
 947,
 949,
 956,
 966,
 966,
 974,
 979,
 988,
 994,
 994,
 996,
 999,
 999,
 999,
 1010,
 1013,
 1014,
 1020,
 1021,
 1041,
 1050,
 1054,
 1056,
 1057,
 1064,
 1079,
 1089,
 1100,
 1100,
 1100,
 1103,
 1116,
 1116,
 1116,
 1116,
 

In [13]:
globalIssues

['841',
 '869',
 '880',
 '959',
 '959',
 '960',
 '960',
 '850',
 '958',
 '762',
 '692',
 '685',
 '602',
 '225',
 '1240',
 '1240',
 '1253',
 '919',
 '1269',
 '876',
 '51',
 '1278',
 '1277',
 '136',
 '128',
 '1285',
 '164',
 '122',
 '122',
 '173',
 '101',
 '114',
 '188',
 '191',
 '318',
 '582',
 '114',
 '186',
 '173',
 '130',
 '244',
 '208',
 '290',
 '290',
 '298',
 '318',
 '301',
 '328',
 '346',
 '366',
 '1191',
 '367',
 '116',
 '370',
 '388',
 '420',
 '420',
 '427',
 '315',
 '384',
 '384',
 '443',
 '445',
 '498',
 '490',
 '458',
 '470',
 '473',
 '314',
 '495',
 '514',
 '116',
 '565',
 '564',
 '116',
 '593',
 '593',
 '545',
 '545',
 '599',
 '601',
 '337',
 '495',
 '608',
 '647',
 '650',
 '627',
 '669',
 '621',
 '669',
 '621',
 '667',
 '410',
 '212',
 '213',
 '520',
 '706',
 '116',
 '621',
 '669',
 '621',
 '669',
 '756',
 '738',
 '751',
 '741',
 '762',
 '789',
 '824',
 '492',
 '492',
 '685',
 '821',
 '60',
 '454',
 '859',
 '856',
 '815',
 '295',
 '466',
 '466',
 '883',
 '507',
 '402',
 '

In [14]:
prsToLink

[7,
 8,
 11,
 12,
 12,
 14,
 14,
 17,
 26,
 28,
 28,
 28,
 28,
 28,
 29,
 29,
 36,
 38,
 39,
 42,
 52,
 54,
 137,
 137,
 143,
 143,
 165,
 175,
 175,
 182,
 183,
 187,
 189,
 200,
 204,
 204,
 211,
 211,
 219,
 230,
 258,
 287,
 300,
 300,
 304,
 319,
 320,
 330,
 347,
 352,
 357,
 370,
 391,
 396,
 418,
 429,
 429,
 440,
 452,
 452,
 452,
 452,
 452,
 452,
 452,
 459,
 472,
 476,
 491,
 500,
 515,
 560,
 567,
 568,
 569,
 594,
 594,
 595,
 595,
 600,
 602,
 605,
 605,
 626,
 648,
 660,
 663,
 677,
 677,
 677,
 677,
 683,
 690,
 694,
 694,
 711,
 715,
 716,
 718,
 718,
 718,
 718,
 718,
 739,
 752,
 755,
 793,
 799,
 828,
 842,
 842,
 844,
 853,
 853,
 858,
 864,
 874,
 874,
 877,
 879,
 879,
 884,
 887,
 893,
 893,
 903,
 915,
 925,
 926,
 929,
 937,
 942,
 947,
 949,
 956,
 966,
 966,
 974,
 979,
 988,
 994,
 994,
 996,
 999,
 999,
 999,
 1010,
 1013,
 1014,
 1020,
 1021,
 1041,
 1050,
 1054,
 1056,
 1057,
 1064,
 1079,
 1089,
 1100,
 1100,
 1100,
 1103,
 1116,
 1116,
 1116,
 1116,
 

In [15]:
t = prIssues.loc[11:11,"PR_Title"]
b = prIssues.loc[11:11,"PR_Body"]

In [16]:
pd.options.display.max_seq_items = 2000
pd.options.display.max_colwidth = 90
pd.options.display.max_rows = 9999

In [17]:
t


11    BugFix for #959 "StringIndexOutOfBoundsException with invalid Preview text"
Name: PR_Title, dtype: object

In [18]:
prIssues.loc[prIssues['Issue_Number'] == 1, "PR_Body"]

0    This will add a new "File Sorting" Tab to the preferences that allows to exactly speci...
Name: PR_Body, dtype: object

In [19]:
newdf = prIssues.loc[953]

In [20]:
newdf


Issue_Number                                                                                                956
Issue_Title                                                                Make tabbedPane in mainframe private
Issue_Author_Name                                                                                   Tobias Diez
Issue_Author_Login                                                                                   tobiasdiez
Issue_Closed_Date                                                                         03/14/16, 08:54:08 AM
Issue_Body            Triggered by #931. \n- Makes tabbedPane in mainframe private\n- Set tab tooltips in co...
Issue_Comments                                                                                    :+1: \n =||= 
PR_Title                                                                   Make tabbedPane in mainframe private
PR_Author_Name                                                                                      Tobi

In [21]:
print(os.linesep.join([str(t), str(b)]))

11    BugFix for #959 "StringIndexOutOfBoundsException with invalid Preview text"
Name: PR_Title, dtype: object
11    Fixed #959 "StringIndexOutOfBoundsException with invalid Preview text". java.lang.Stri...
Name: PR_Body, dtype: object


In [22]:
newdf.to_csv(None)

',953\nIssue_Number,956\nIssue_Title,Make tabbedPane in mainframe private\nIssue_Author_Name,Tobias Diez\nIssue_Author_Login,tobiasdiez\nIssue_Closed_Date,"03/14/16, 08:54:08 AM"\nIssue_Body,"Triggered by #931. \n- Makes tabbedPane in mainframe private\n- Set tab tooltips in consistent way (for example they should now also show up after the tab list is sorted)\n## \n- [x] Change in CHANGELOG.md described? not worth it\n- [x] Changes in pull request outlined? (What, why, ...)\n- [x] Tests created for changes? well....\n- [x] Tests green?"\nIssue_Comments,":+1: \n =||= "\nPR_Title,Make tabbedPane in mainframe private\nPR_Author_Name,Tobias Diez\nPR_Author_Login,tobiasdiez\nPR_Closed_Date,"03/14/16, 08:54:08 AM"\nPR_Body,"Triggered by #931. \n- Makes tabbedPane in mainframe private\n- Set tab tooltips in consistent way (for example they should now also show up after the tab list is sorted)\n## \n- [x] Change in CHANGELOG.md described? not worth it\n- [x] Changes in pull request outlined? 

In [23]:
import numpy as np
i=0
line = []
dataLine = ""
dataLine = "PR, Issue, Issue_Title_Linked, Issue_Body_Linked, Issue_Comment_Linked, isTrain"
line.append((dataLine)) 
pr_ant = 0
issue_ant = 0

headerdf = ['PR', 'Issue', 'Issue_Title_Linked', 'Issue_Body_Linked', 'Issue_Comment_Linked', 'isTrain']

tup_line = []

while i < len(globalIssues):
#for issue in globalIssues: 
    issue = globalIssues[i]
    print ("processing...",issue)
    pr = prsToLink[i]
    #ispr = prIssues.loc[prIssues['PR_Number'] == issue, 'isPR'].values
    ispr = prIssues.loc[prIssues['Issue_Number'] == int(issue), 'isPR'].values
    #print(ispr)
    #df.query('PR_Number==3')['A']
    if (ispr==0) :
        if (issue!=issue_ant or pr!=pr_ant):
            issuetitle = prIssues.loc[prIssues['Issue_Number'] == int(issue), 'Issue_Title'].values
            issuebody = prIssues.loc[prIssues['Issue_Number'] == int(issue), 'Issue_Body'].values
            issuecomments = prIssues.loc[prIssues['Issue_Number'] == int(issue), 'Issue_Comments'].values
            isTrain = 0
            print("Issue:",issue, " linked from PR:", pr)

            dataLine =  str(pr) + "," + str(issue) + "," + str(issuetitle[0]) + "," + str(issuebody[0]) + "," + str(issuecomments[0]) + "," + str(isTrain)
            line.append((dataLine))
            #print(line)
            tup = ( str(pr) , str(issue) , str(issuetitle[0]) , str(issuebody[0]) , str(issuecomments[0]) , str(isTrain))
            list_tup = [ str(pr) , str(issue) , str(issuetitle[0]) , str(issuebody[0]) , str(issuecomments[0]) ,str(isTrain)]
            tup_line.append(tup)
        else:
             print("Issue:",issue, " repeated:", pr)   

    else:
        print("Issue:",issue, " was in fact an PR! pr base:", pr)
        
    i=i+1
    pr_ant = pr
    issue_ant = issue
    
    
#newFields2 = pd.DataFrame.from_records(data=[tup_line], columns=[headerdf])
newFields3 = pd.DataFrame(tup_line)  
 
newFields = pd.DataFrame(line)
newFields3.columns = ['PR', 'Issue', 'Issue_Title_Linked', 'Issue_Body_Linked', 'Issue_Comment_Linked', 'isTrain']

# create newFields output filepath
new_fields_csv = proj_fields_dir + "newFields.csv"
new_fields_txt = proj_fields_dir + "newFields.txt"

# create newFields3 output filepath
new_fields3_csv = proj_fields_dir + "newFields3.csv"
new_fields3_txt = proj_fields_dir + "newFields3.txt"

newFields.to_csv( new_fields_csv, encoding='utf-8', header=True, index=False, sep=',' )
np.savetxt( new_fields_txt, newFields.values, fmt='%s', delimiter=',' )

newFields3.to_csv( new_fields3_csv, encoding='utf-8', header=True, index=False, sep=',' )
np.savetxt( new_fields3_txt, newFields3.values, fmt='%s', delimiter=',')


processing... 841
Issue: 841  was in fact an PR! pr base: 7
processing... 869
Issue: 869  was in fact an PR! pr base: 8
processing... 880
Issue: 880  linked from PR: 11
processing... 959
Issue: 959  linked from PR: 12
processing... 959
Issue: 959  repeated: 12
processing... 960
Issue: 960  linked from PR: 14
processing... 960
Issue: 960  repeated: 14
processing... 850
Issue: 850  linked from PR: 17
processing... 958
Issue: 958  linked from PR: 26
processing... 762
Issue: 762  linked from PR: 28
processing... 692
Issue: 692  was in fact an PR! pr base: 28
processing... 685
Issue: 685  linked from PR: 28
processing... 602
Issue: 602  was in fact an PR! pr base: 28
processing... 225
Issue: 225  was in fact an PR! pr base: 28
processing... 1240
Issue: 1240  was in fact an PR! pr base: 29
processing... 1240
Issue: 1240  was in fact an PR! pr base: 29
processing... 1253
Issue: 1253  linked from PR: 36
processing... 919
Issue: 919  linked from PR: 38
processing... 1269
Issue: 1269  was in fac

Issue: 1631  was in fact an PR! pr base: 1640
processing... 163
Issue: 163  was in fact an PR! pr base: 1641
processing... 1639
Issue: 1639  linked from PR: 1641
processing... 1637
Issue: 1637  was in fact an PR! pr base: 1645
processing... 163
Issue: 163  was in fact an PR! pr base: 1649
processing... 1631
Issue: 1631  was in fact an PR! pr base: 1649
processing... 1630
Issue: 1630  was in fact an PR! pr base: 1652
processing... 1594
Issue: 1594  was in fact an PR! pr base: 1654
processing... 1594
Issue: 1594  was in fact an PR! pr base: 1656
processing... 898
Issue: 898  linked from PR: 1666
processing... 1665
Issue: 1665  linked from PR: 1666
processing... 1288
Issue: 1288  linked from PR: 1671
processing... 1669
Issue: 1669  linked from PR: 1678
processing... 1682
Issue: 1682  linked from PR: 1683
processing... 1682
Issue: 1682  repeated: 1683
processing... 1681
Issue: 1681  linked from PR: 1684
processing... 1609
Issue: 1609  linked from PR: 1712
processing... 1716
Issue: 1716  li

  if (ispr==0) :


Issue: 1825  linked from PR: 2227
processing... 2228
Issue: 2228  linked from PR: 2229
processing... 2200
Issue: 2200  linked from PR: 2230
processing... 2233
Issue: 2233  linked from PR: 2236
processing... 2239
Issue: 2239  linked from PR: 2240
processing... 2241
Issue: 2241  linked from PR: 2245
processing... 2249
Issue: 2249  linked from PR: 2262
processing... 684
Issue: 684  linked from PR: 2266
processing... 2279
Issue: 2279  linked from PR: 2281
processing... 2279
Issue: 2279  repeated: 2281
processing... 1359
Issue: 1359  linked from PR: 2283
processing... 2294
Issue: 2294  linked from PR: 2295
processing... 2219
Issue: 2219  linked from PR: 2300
processing... 2221
Issue: 2221  linked from PR: 2301
processing... 2068
Issue: 2068  was in fact an PR! pr base: 2301
processing... 2307
Issue: 2307  was in fact an PR! pr base: 2310
processing... 2311
Issue: 2311  linked from PR: 2314
processing... 2273
Issue: 2273  linked from PR: 2316
processing... 187
Issue: 187  was in fact an PR! 

Issue: 2704  was in fact an PR! pr base: 2716
processing... 2617
Issue: 2617  linked from PR: 2719
processing... 2671
Issue: 2671  linked from PR: 2720
processing... 2692
Issue: 2692  was in fact an PR! pr base: 2723
processing... 2722
Issue: 2722  linked from PR: 2727
processing... 268
Issue: 268  linked from PR: 2728
processing... 2701
Issue: 2701  linked from PR: 2732
processing... 2095
Issue: 2095  linked from PR: 2747
processing... 209
Issue: 209  linked from PR: 2748
processing... 2744
Issue: 2744  linked from PR: 2751
processing... 2739
Issue: 2739  linked from PR: 2751
processing... 2760
Issue: 2760  linked from PR: 2764
processing... 2766
Issue: 2766  linked from PR: 2774
processing... 2777
Issue: 2777  linked from PR: 2778
processing... 2762
Issue: 2762  linked from PR: 2793
processing... 2789
Issue: 2789  linked from PR: 2794
processing... 2789
Issue: 2789  repeated: 2794
processing... 2806
Issue: 2806  linked from PR: 2816
processing... 2803
Issue: 2803  linked from PR: 281

Issue: 49  was in fact an PR! pr base: 4007
processing... 3973
Issue: 3973  linked from PR: 4011
processing... 49
Issue: 49  was in fact an PR! pr base: 4012
processing... 4013
Issue: 4013  linked from PR: 4015
processing... 49
Issue: 49  was in fact an PR! pr base: 4015
processing... 4014
Issue: 4014  linked from PR: 4016
processing... 49
Issue: 49  was in fact an PR! pr base: 4016
processing... 4018
Issue: 4018  linked from PR: 4019
processing... 49
Issue: 49  was in fact an PR! pr base: 4019
processing... 49
Issue: 49  was in fact an PR! pr base: 4030
processing... 49
Issue: 49  was in fact an PR! pr base: 4031
processing... 3421
Issue: 3421  was in fact an PR! pr base: 4034
processing... 3421
Issue: 3421  was in fact an PR! pr base: 4035
processing... 49
Issue: 49  was in fact an PR! pr base: 4037
processing... 3419
Issue: 3419  linked from PR: 4040
processing... 49
Issue: 49  was in fact an PR! pr base: 4040
processing... 403
Issue: 403  was in fact an PR! pr base: 4047
processing

  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :


 4498
processing... 4113
Issue: 4113  linked from PR: 4498
processing... 1536
Issue: 1536  linked from PR: 4502
processing... 1523
Issue: 1523  linked from PR: 4502
processing... 1525
Issue: 1525  was in fact an PR! pr base: 4502
processing... 123
Issue: 123  linked from PR: 4503
processing... 122
Issue: 122  linked from PR: 4503
processing... 123
Issue: 123  linked from PR: 4504
processing... 122
Issue: 122  linked from PR: 4504
processing... 1600
Issue: 1600  linked from PR: 4510
processing... 1600
Issue: 1600  linked from PR: 4511
processing... 627
Issue: 627  linked from PR: 4512
processing... 774
Issue: 774  linked from PR: 4512
processing... 780
Issue: 780  was in fact an PR! pr base: 4512
processing... 779
Issue: 779  linked from PR: 4512
processing... 775
Issue: 775  was in fact an PR! pr base: 4512
processing... 785
Issue: 785  was in fact an PR! pr base: 4512
processing... 778
Issue: 778  was in fact an PR! pr base: 4512
processing... 784
Issue: 784  was in fact an PR! pr bas

Issue: 1612  linked from PR: 4723
processing... 1639
Issue: 1639  linked from PR: 4723
processing... 1638
Issue: 1638  was in fact an PR! pr base: 4723
processing... 1635
Issue: 1635  was in fact an PR! pr base: 4723
processing... 1619
Issue: 1619  was in fact an PR! pr base: 4723
processing... 1648
Issue: 1648  was in fact an PR! pr base: 4723
processing... 4655
Issue: 4655  linked from PR: 4725
processing... 49
Issue: 49  was in fact an PR! pr base: 4725
processing... 4306
Issue: 4306  linked from PR: 4727
processing... 4306
Issue: 4306  repeated: 4727
processing... 4728
Issue: 4728  linked from PR: 4729
processing... 49
Issue: 49  was in fact an PR! pr base: 4729
processing... 4584
Issue: 4584  linked from PR: 4730
processing... 49
Issue: 49  was in fact an PR! pr base: 4730
processing... 4724
Issue: 4724  linked from PR: 4731
processing... 49
Issue: 49  was in fact an PR! pr base: 4731
processing... 4494
Issue: 4494  linked from PR: 4732
processing... 4494
Issue: 4494  repeated: 47

  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :


4954
Issue: 4954  linked from PR: 4955
processing... 49
Issue: 49  was in fact an PR! pr base: 4960
processing... 4490
Issue: 4490  linked from PR: 4962
processing... 4886
Issue: 4886  linked from PR: 4964
processing... 49
Issue: 49  was in fact an PR! pr base: 4964
processing... 4913
Issue: 4913  linked from PR: 4965
processing... 49
Issue: 49  was in fact an PR! pr base: 4965
processing... 482
Issue: 482  linked from PR: 4966
processing... 4827
Issue: 4827  linked from PR: 4966
processing... 4968
Issue: 4968  linked from PR: 4970
processing... 497
Issue: 497  linked from PR: 4975
processing... 3599
Issue: 3599  linked from PR: 4975
processing... 4971
Issue: 4971  linked from PR: 4975
processing... 2016
Issue: 2016  linked from PR: 4979
processing... 4977
Issue: 4977  linked from PR: 4980
processing... 496
Issue: 496  linked from PR: 4981
processing... 688
Issue: 688  was in fact an PR! pr base: 4989
processing... 818
Issue: 818  linked from PR: 4989
processing... 812
Issue: 812  was 

  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :


Issue: 4629  repeated: 5150
processing... 6910
Issue: 6910  was in fact an PR! pr base: 5153
processing... 6805
Issue: 6805  was in fact an PR! pr base: 5153
processing... 5008
Issue: 5008  was in fact an PR! pr base: 5153
processing... 3383
Issue: 3383  linked from PR: 5153
processing... 188
Issue: 188  linked from PR: 5157
processing... 8203
Issue: 8203  was in fact an PR! pr base: 5157
processing... 195
Issue: 195  was in fact an PR! pr base: 5157
processing... 8203
Issue: 8203  was in fact an PR! pr base: 5157
processing... 173
Issue: 173  linked from PR: 5157
processing... 8203
Issue: 8203  was in fact an PR! pr base: 5157
processing... 204
Issue: 204  was in fact an PR! pr base: 5157
processing... 8203
Issue: 8203  was in fact an PR! pr base: 5157
processing... 205
Issue: 205  was in fact an PR! pr base: 5157
processing... 8203
Issue: 8203  was in fact an PR! pr base: 5157
processing... 181
Issue: 181  linked from PR: 5157
processing... 174
Issue: 174  linked from PR: 5157
proces

  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :


 5381
processing... 49
Issue: 49  was in fact an PR! pr base: 5381
processing... 5369
Issue: 5369  linked from PR: 5382
processing... 5369
Issue: 5369  repeated: 5382
processing... 5369
Issue: 5369  repeated: 5382
processing... 49
Issue: 49  was in fact an PR! pr base: 5382
processing... 49
Issue: 49  was in fact an PR! pr base: 5385
processing... 5246
Issue: 5246  linked from PR: 5385
processing... 5226
Issue: 5226  was in fact an PR! pr base: 5388
processing... 49
Issue: 49  was in fact an PR! pr base: 5388
processing... 49
Issue: 49  was in fact an PR! pr base: 5389
processing... 49
Issue: 49  was in fact an PR! pr base: 5390
processing... 49
Issue: 49  was in fact an PR! pr base: 5391
processing... 5359
Issue: 5359  linked from PR: 5392
processing... 5364
Issue: 5364  was in fact an PR! pr base: 5393
processing... 49
Issue: 49  was in fact an PR! pr base: 5394
processing... 5169
Issue: 5169  linked from PR: 5395
processing... 49
Issue: 49  was in fact an PR! pr base: 5395
processin

  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :



processing... 864
Issue: 864  was in fact an PR! pr base: 5693
processing... 868
Issue: 868  was in fact an PR! pr base: 5693
processing... 826
Issue: 826  was in fact an PR! pr base: 5693
processing... 845
Issue: 845  linked from PR: 5693
processing... 845
Issue: 845  repeated: 5693
processing... 862
Issue: 862  linked from PR: 5693
processing... 874
Issue: 874  was in fact an PR! pr base: 5693
processing... 872
Issue: 872  linked from PR: 5693
processing... 738
Issue: 738  linked from PR: 5693
processing... 877
Issue: 877  was in fact an PR! pr base: 5693
processing... 875
Issue: 875  was in fact an PR! pr base: 5693
processing... 876
Issue: 876  was in fact an PR! pr base: 5693
processing... 875
Issue: 875  was in fact an PR! pr base: 5693
processing... 873
Issue: 873  was in fact an PR! pr base: 5693
processing... 872
Issue: 872  linked from PR: 5693
processing... 870
Issue: 870  was in fact an PR! pr base: 5693
processing... 871
Issue: 871  linked from PR: 5693
processing... 850


  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :


 6005
processing... 1868
Issue: 1868  was in fact an PR! pr base: 6005
processing... 1867
Issue: 1867  was in fact an PR! pr base: 6005
processing... 1863
Issue: 1863  was in fact an PR! pr base: 6005
processing... 336
Issue: 336  was in fact an PR! pr base: 6006
processing... 338
Issue: 338  was in fact an PR! pr base: 6006
processing... 337
Issue: 337  linked from PR: 6006
processing... 336
Issue: 336  was in fact an PR! pr base: 6006
processing... 881
Issue: 881  was in fact an PR! pr base: 6007
processing... 894
Issue: 894  linked from PR: 6007
processing... 895
Issue: 895  linked from PR: 6007
processing... 896
Issue: 896  linked from PR: 6007
processing... 897
Issue: 897  linked from PR: 6007
processing... 899
Issue: 899  was in fact an PR! pr base: 6007
processing... 900
Issue: 900  was in fact an PR! pr base: 6007
processing... 899
Issue: 899  was in fact an PR! pr base: 6007
processing... 882
Issue: 882  linked from PR: 6007
processing... 887
Issue: 887  was in fact an PR! pr 

  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :


 6372
processing... 6357
Issue: 6357  linked from PR: 6377
processing... 6091
Issue: 6091  linked from PR: 6379
processing... 5662
Issue: 5662  linked from PR: 6381
processing... 6303
Issue: 6303  linked from PR: 6386
processing... 6383
Issue: 6383  linked from PR: 6389
processing... 6396
Issue: 6396  linked from PR: 6398
processing... 6430
Issue: 6430  linked from PR: 6402
processing... 6403
Issue: 6403  linked from PR: 6406
processing... 6297
Issue: 6297  linked from PR: 6407
processing... 842
Issue: 842  was in fact an PR! pr base: 6408
processing... 407
Issue: 407  was in fact an PR! pr base: 6412
processing... 407
Issue: 407  was in fact an PR! pr base: 6413
processing... 421
Issue: 421  linked from PR: 6414
processing... 423
Issue: 423  linked from PR: 6414
processing... 425
Issue: 425  linked from PR: 6414
processing... 425
Issue: 425  repeated: 6414
processing... 423
Issue: 423  linked from PR: 6414
processing... 424
Issue: 424  was in fact an PR! pr base: 6414
processing... 42

  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :


1855
Issue: 1855  was in fact an PR! pr base: 6712
processing... 939
Issue: 939  linked from PR: 6712
processing... 1974
Issue: 1974  linked from PR: 6712
processing... 1974
Issue: 1974  repeated: 6712
processing... 1855
Issue: 1855  was in fact an PR! pr base: 6712
processing... 1855
Issue: 1855  was in fact an PR! pr base: 6712
processing... 1978
Issue: 1978  was in fact an PR! pr base: 6712
processing... 1978
Issue: 1978  was in fact an PR! pr base: 6712
processing... 1977
Issue: 1977  was in fact an PR! pr base: 6712
processing... 1977
Issue: 1977  was in fact an PR! pr base: 6712
processing... 1967
Issue: 1967  was in fact an PR! pr base: 6712
processing... 1968
Issue: 1968  was in fact an PR! pr base: 6712
processing... 1968
Issue: 1968  was in fact an PR! pr base: 6712
processing... 1967
Issue: 1967  was in fact an PR! pr base: 6712
processing... 1967
Issue: 1967  was in fact an PR! pr base: 6712
processing... 1973
Issue: 1973  linked from PR: 6712
processing... 1973
Issue: 1973

  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :


Issue: 2053  was in fact an PR! pr base: 6950
processing... 2050
Issue: 2050  linked from PR: 6950
processing... 2050
Issue: 2050  repeated: 6950
processing... 2053
Issue: 2053  was in fact an PR! pr base: 6950
processing... 2050
Issue: 2050  linked from PR: 6950
processing... 98
Issue: 98  linked from PR: 6954
processing... 93
Issue: 93  was in fact an PR! pr base: 6954
processing... 93
Issue: 93  was in fact an PR! pr base: 6954
processing... 368
Issue: 368  was in fact an PR! pr base: 6956
processing... 370
Issue: 370  was in fact an PR! pr base: 6956
processing... 265
Issue: 265  was in fact an PR! pr base: 6956
processing... 305
Issue: 305  was in fact an PR! pr base: 6956
processing... 305
Issue: 305  was in fact an PR! pr base: 6956
processing... 6867
Issue: 6867  linked from PR: 6960
processing... 6964
Issue: 6964  linked from PR: 6965
processing... 6369
Issue: 6369  linked from PR: 6968
processing... 109
Issue: 109  linked from PR: 6971
processing... 109
Issue: 109  repeated: 

  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :


  linked from PR: 7191
processing... 6920
Issue: 6920  was in fact an PR! pr base: 7191
processing... 7203
Issue: 7203  linked from PR: 7207
processing... 3925
Issue: 3925  linked from PR: 7208
processing... 7205
Issue: 7205  linked from PR: 7209
processing... 7206
Issue: 7206  linked from PR: 7209
processing... 6942
Issue: 6942  linked from PR: 7210
processing... 7199
Issue: 7199  linked from PR: 7210
processing... 6942
Issue: 6942  linked from PR: 7210
processing... 7199
Issue: 7199  linked from PR: 7210
processing... 6942
Issue: 6942  linked from PR: 7210
processing... 7199
Issue: 7199  linked from PR: 7210
processing... 6706
Issue: 6706  was in fact an PR! pr base: 7210
processing... 7199
Issue: 7199  linked from PR: 7210
processing... 6942
Issue: 6942  linked from PR: 7210
processing... 6706
Issue: 6706  was in fact an PR! pr base: 7210
processing... 7199
Issue: 7199  linked from PR: 7210
processing... 402
Issue: 402  linked from PR: 7211
processing... 402
Issue: 402  repeated: 72

Issue: 57  was in fact an PR! pr base: 7446
processing... 57
Issue: 57  was in fact an PR! pr base: 7446
processing... 55
Issue: 55  was in fact an PR! pr base: 7446
processing... 177
Issue: 177  linked from PR: 7447
processing... 7454
Issue: 7454  linked from PR: 7455
processing... 3791
Issue: 3791  linked from PR: 7458
processing... 1903
Issue: 1903  linked from PR: 7460
processing... 1905
Issue: 1905  was in fact an PR! pr base: 7460
processing... 2012
Issue: 2012  linked from PR: 7460
processing... 2012
Issue: 2012  repeated: 7460
processing... 2044
Issue: 2044  was in fact an PR! pr base: 7460
processing... 2040
Issue: 2040  was in fact an PR! pr base: 7460
processing... 2040
Issue: 2040  was in fact an PR! pr base: 7460
processing... 2052
Issue: 2052  linked from PR: 7460
processing... 2052
Issue: 2052  repeated: 7460
processing... 2049
Issue: 2049  linked from PR: 7460
processing... 2049
Issue: 2049  repeated: 7460
processing... 2069
Issue: 2069  was in fact an PR! pr base: 7460

  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :



processing... 262
Issue: 262  linked from PR: 7645
processing... 295
Issue: 295  linked from PR: 7646
processing... 551
Issue: 551  linked from PR: 7646
processing... 553
Issue: 553  was in fact an PR! pr base: 7646
processing... 572
Issue: 572  was in fact an PR! pr base: 7646
processing... 572
Issue: 572  was in fact an PR! pr base: 7646
processing... 571
Issue: 571  linked from PR: 7646
processing... 6207
Issue: 6207  linked from PR: 7649
processing... 7633
Issue: 7633  linked from PR: 7652
processing... 7195
Issue: 7195  linked from PR: 7655
processing... 7195
Issue: 7195  repeated: 7655
processing... 7016
Issue: 7016  linked from PR: 7656
processing... 7016
Issue: 7016  repeated: 7656
processing... 7616
Issue: 7616  linked from PR: 7657
processing... 7343
Issue: 7343  linked from PR: 7659
processing... 7660
Issue: 7660  linked from PR: 7663
processing... 7660
Issue: 7660  repeated: 7663
processing... 7660
Issue: 7660  repeated: 7663
processing... 7660
Issue: 7660  repeated: 7663


  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :
  if (ispr==0) :


In [24]:
#testPRs

In [25]:
#i=0
#while i < len(testPRs):
#for issue in globalIssues: 
    
    #print ("coverting to test...",pr)
    #pr = testPRs[i]
    #ispr = prIssues.loc[prIssues['PR_Number'] == issue, 'isPR'].values
    #isTrain = prIssues.loc[prIssues['PR_Number'] == int(pr), 'isTrain'].values
    #print(ispr)
    #df.query('PR_Number==3')['A']
    #if (isTrain==1) :
        #prIssues.loc[prIssues['PR_Number'] == int(pr), 'isTrain'].values = [0]
        #print ("PR test", pr)
        
    #i=i+1

In [26]:
#prIssues.loc[prIssues['isTrain'] == 0, 'PR_Number'].values

In [27]:
type(ispr)

numpy.ndarray

In [28]:
type(issue)

str

In [29]:
ispr


array([0])

In [30]:
ispr = prIssues.loc[prIssues['Issue_Number'] == 959, 'isPR'].values[0]

In [31]:
ispr


0

In [32]:
prIssues.loc[prIssues['Issue_Number'] == 959]

Unnamed: 0,Issue_Number,Issue_Title,Issue_Author_Name,Issue_Author_Login,Issue_Closed_Date,Issue_Body,Issue_Comments,PR_Title,PR_Author_Name,PR_Author_Login,PR_Closed_Date,PR_Body,PR_Comments,Commit_Author_Name,Commit_Date,Commit_Message,isPR
956,959,"Cleanup entries: Rely on ""field formatter""",Oliver Kopp,koppor,"05/12/16, 08:02:09 PM","The first two cleanups can be achieved using the ""field formatters"".\n- [ ] ""Run Unico...",All the points are already fixed in the latest master version.\n =||=,,,,,,,,,,0


In [33]:
issue

'7786'

In [34]:
int(issue)

7786

In [35]:
issuetitle[0]

'Library import: no "select all" option'

In [36]:
issuebody[0]

'JabRef 5.2--2020-12-24--6a2a512\r\nWindows 10 10.0 amd64 \r\nJava 14.0.2\r\n\r\nWhen importing another library into the current one, a dialog opens in which you have to select entries.\r\nThere is a "select new entries", which is not reliable (I tried with existing entries that had been modified: the software does not see them as new), and a "unselect all".\r\n\r\nWhy not add a "select all"?\r\n\r\nCtrl-click and shift-click do not even provide this functionality.\r\n\r\nMy production rate has definitly lowered after my third bug report this day'

In [37]:
issuecomments[0]

'Thanks for your suggestion. Can you please elaborate on why you want to import duplicate entries? If you later want to merge them with an existing entry, this is already possible in the import dialog: you have a small icon next to the duplicated item that lets you manage the import / merge strategy for this entry. =||= I have the same issiue on Windows 10. I cannot import PDF files at once. Drag and Drop doenst work.. How to add multiple PDF files to library ?\r\n\r\nEdit: Drag and Drop work on version 5.3 =||= Thanks to @brapana  this issue is now resolved int he latest main!  =||= '

In [38]:
newFields = pd.read_csv( new_fields_csv, header = 0, sep=",")

In [39]:
newFields3 = pd.read_csv( new_fields3_csv, header = 0, sep=",")

In [40]:
newFields3.head(10)

Unnamed: 0,PR,Issue,Issue_Title_Linked,Issue_Body_Linked,Issue_Comment_Linked,isTrain
0,11.0,880,OO/LO: Jabref shoud respect MaxAuthors/MaxFirstAuthors in Bibliography,"Jab Ref 3.2 Development version, Libre Office 5.0.2 x64 on Windows 7x63\n\nWhen I inse...",Are those lengths always the same or should one add another parameter for the bibliogr...,0.0
1,12.0,959,"Cleanup entries: Rely on ""field formatter""","The first two cleanups can be achieved using the ""field formatters"".\n- [ ] ""Run Unico...",All the points are already fixed in the latest master version.\n =||=,0.0
2,14.0,960,Remove trailing ; at metadata,"While reading test cases of the metadata serialization, I saw that the final `;` seems...","No objections, but we still need to be able to parse the old format. But we can write ...",0.0
3,17.0,850,Remove private fields (and other field flags),Private fields (fields marked with `net.sf.jabref.gui.InternalBibtexFields.BibtexSingl...,"@JabRef/developers Are these fields removed? I cannot find anything anymore, when sear...",0.0
4,26.0,958,Size of database properties dialog,The size of the database properties dialog is not fixed. When opening the dialog the f...,I did not notice a problem on Win7. The only thing is that the space between the last ...,0.0
5,28.0,762,[WIP] Replaced BibEntry[] with List<BibEntry>,Replaced `BibEntry[]` with `List<BibEntry>` as return from `MainTable.getSelectedEntri...,More or less the same changes as in #718 :crying_cat_face: \n =||= Oh no...\n =||= Wai...,0.0
6,28.0,685,Can't export to MySQL 5.5 in JabRef 3.2,"I'm using Jabref 3.2 and MySQL 5.5, but now I can't export my .bib to SQL database. It...","Is it possible to post a [minimal, complete, and verifiable example](https://en.wikipe...",0.0
7,36.0,1253,NullPointerException on editing bibtex file,JabRef version 2.10 on gentoo Linux\n\nSteps to reproduce: This recently started happe...,"Would you please test with the actual version 3.3?\n =||= Hi There,\n\nGentoo doesn't ...",0.0
8,38.0,919,Integrate journal abbreviations into JabRef,"Currently, the repository https://github.com/JabRef/reference-abbreviations contains s...","As a big fan of using strings for journal abbreviations, I also believe that it would ...",0.0
9,137.0,136,Extract and unify all GUI actions,The question is how can we reduce the cluttering caused by those inline classes.\n\nTh...,> The question is how can we reduce the cluttering caused by those inline classes.\n\n...,0.0


In [41]:
newFields3.columns = ['PR', 'Issue', 'Issue_Title_Linked', 'Issue_Body_Linked', 'Issue_Comment_Linked', 'isTrain']

In [42]:
newFields3.head(10)

Unnamed: 0,PR,Issue,Issue_Title_Linked,Issue_Body_Linked,Issue_Comment_Linked,isTrain
0,11.0,880,OO/LO: Jabref shoud respect MaxAuthors/MaxFirstAuthors in Bibliography,"Jab Ref 3.2 Development version, Libre Office 5.0.2 x64 on Windows 7x63\n\nWhen I inse...",Are those lengths always the same or should one add another parameter for the bibliogr...,0.0
1,12.0,959,"Cleanup entries: Rely on ""field formatter""","The first two cleanups can be achieved using the ""field formatters"".\n- [ ] ""Run Unico...",All the points are already fixed in the latest master version.\n =||=,0.0
2,14.0,960,Remove trailing ; at metadata,"While reading test cases of the metadata serialization, I saw that the final `;` seems...","No objections, but we still need to be able to parse the old format. But we can write ...",0.0
3,17.0,850,Remove private fields (and other field flags),Private fields (fields marked with `net.sf.jabref.gui.InternalBibtexFields.BibtexSingl...,"@JabRef/developers Are these fields removed? I cannot find anything anymore, when sear...",0.0
4,26.0,958,Size of database properties dialog,The size of the database properties dialog is not fixed. When opening the dialog the f...,I did not notice a problem on Win7. The only thing is that the space between the last ...,0.0
5,28.0,762,[WIP] Replaced BibEntry[] with List<BibEntry>,Replaced `BibEntry[]` with `List<BibEntry>` as return from `MainTable.getSelectedEntri...,More or less the same changes as in #718 :crying_cat_face: \n =||= Oh no...\n =||= Wai...,0.0
6,28.0,685,Can't export to MySQL 5.5 in JabRef 3.2,"I'm using Jabref 3.2 and MySQL 5.5, but now I can't export my .bib to SQL database. It...","Is it possible to post a [minimal, complete, and verifiable example](https://en.wikipe...",0.0
7,36.0,1253,NullPointerException on editing bibtex file,JabRef version 2.10 on gentoo Linux\n\nSteps to reproduce: This recently started happe...,"Would you please test with the actual version 3.3?\n =||= Hi There,\n\nGentoo doesn't ...",0.0
8,38.0,919,Integrate journal abbreviations into JabRef,"Currently, the repository https://github.com/JabRef/reference-abbreviations contains s...","As a big fan of using strings for journal abbreviations, I also believe that it would ...",0.0
9,137.0,136,Extract and unify all GUI actions,The question is how can we reduce the cluttering caused by those inline classes.\n\nTh...,> The question is how can we reduce the cluttering caused by those inline classes.\n\n...,0.0


In [43]:
newFields.head(10)

Unnamed: 0,0
0,"PR, Issue, Issue_Title_Linked, Issue_Body_Linked, Issue_Comment_Linked, isTrain"
1,"11,880,OO/LO: Jabref shoud respect MaxAuthors/MaxFirstAuthors in Bibliography,Jab Ref ..."
2,"12,959,Cleanup entries: Rely on ""field formatter"",The first two cleanups can be achiev..."
3,"14,960,Remove trailing ; at metadata,While reading test cases of the metadata serializ..."
4,"17,850,Remove private fields (and other field flags),Private fields (fields marked wit..."
5,"26,958,Size of database properties dialog,The size of the database properties dialog i..."
6,"28,762,[WIP] Replaced BibEntry[] with List<BibEntry>,Replaced `BibEntry[]` with `List<..."
7,"28,685,Can't export to MySQL 5.5 in JabRef 3.2,I'm using Jabref 3.2 and MySQL 5.5, but..."
8,"36,1253,NullPointerException on editing bibtex file,JabRef version 2.10 on gentoo Linu..."
9,"38,919,Integrate journal abbreviations into JabRef,Currently, the repository https://g..."


In [44]:
df_merge = pd.merge(prIssues, newFields3, left_on='Issue_Number', right_on='PR', how='left')

In [45]:
df_merge.head(15)

Unnamed: 0,Issue_Number,Issue_Title,Issue_Author_Name,Issue_Author_Login,Issue_Closed_Date,Issue_Body,Issue_Comments,PR_Title,PR_Author_Name,PR_Author_Login,...,Commit_Author_Name,Commit_Date,Commit_Message,isPR,PR,Issue,Issue_Title_Linked,Issue_Body_Linked,Issue_Comment_Linked,isTrain
0,1,New Sorting/Export preferences,Olaf Lenz,olenz,"03/12/14, 11:38:01 AM","This will add a new ""File Sorting"" Tab to the preferences that allows to exactly speci...",,New Sorting/Export preferences,Olaf Lenz,olenz,...,Olaf Lenz,"03/12/14, 09:16:45 AM",Merge branch 'sorting',1,,,,,,
1,2,Basic gradle integration,Simon Harrer,simonharrer,"03/12/14, 06:29:22 PM",This adds basic gradle integration. The project files for Intellij and Eclipse can be ...,,Basic gradle integration,Simon Harrer,simonharrer,...,Simon Harrer,"03/12/14, 05:20:08 PM",Gradle build works basically. Reuses existing folder structure.,1,,,,,,
2,3,Some example new Dutch translations via the GitHub web page.,Egon Willighagen,egonw,"03/15/14, 12:55:32 PM",,,Some example new Dutch translations via the GitHub web page.,Egon Willighagen,egonw,...,Egon Willighagen,"03/15/14, 09:33:10 AM",Some example new Dutch translations via the GitHub web page.,1,,,,,,
3,4,Spanish translation update,Jorge Tornero,jtornero,"03/17/14, 10:20:50 PM",Three new strings translated.,"Even though the other translation files have been updated, I accept this pull request....",Spanish translation update,Jorge Tornero,jtornero,...,jtornero,"03/17/14, 09:28:26 PM",Spanish translation update,1,,,,,,
4,5,Update JabRef_in.properties,,was123,"03/18/14, 05:12:53 AM",Indonesian translation added,,Update JabRef_in.properties,,was123,...,was123,"03/18/14, 12:01:58 AM",Update JabRef_in.properties\n\nIndonesian translation added,1,,,,,,
5,6,Fixed splash-dev.svg,Olaf Lenz,olenz,"03/19/14, 07:46:25 PM",,,Fixed splash-dev.svg,Olaf Lenz,olenz,...,Olaf Lenz,"03/19/14, 07:41:29 PM",Fixed splash-dev.svg,1,,,,,,
6,7,I have implemented 2 features requested in Ticket #841.,,noravanq,"03/23/14, 04:03:12 PM","```\nThe user can now specify an arbitrary number of file-link columns in te ""Entry ta...",,I have implemented 2 features requested in Ticket #841.,,noravanq,...,noravanq,"03/23/14, 04:48:49 AM",I have implemented 2 features requested in Ticket #841.\n\n The user can now speci...,1,,,,,,
7,8,PDF-file metadata: Privacy Filtering all metadata,Adrian Daerr,adaerr,"04/23/14, 09:23:03 PM",This pull-request pertains to the addition of metadata to PDF files associated with en...,thx!\n =||=,PDF-file metadata: Privacy Filtering all metadata,Adrian Daerr,adaerr,...,Adrian Daerr,"04/22/14, 06:19:15 PM",Erase fields listed in XMP Privacy Settings from PDF DocumentInformation when XMP-tagging,1,,,,,,
8,9,Support FindFullText with ACS DOIs,Peter Ansell,ansell,"05/20/14, 12:53:43 PM",Adds a FullTextFinder implementation to transform the ACS DOI redirect URLs to their P...,,Support FindFullText with ACS DOIs,Peter Ansell,ansell,...,Peter Ansell,"05/07/14, 01:02:09 AM",add CSIRO copyright 2014,1,,,,,,
9,10,try to fix some obvious bugs about `groups`,,braindevices,"05/20/14, 12:55:20 PM","Hi All,\nI just upgraded to 2.10 and I found some bugs immediately.\n1. in the right-c...",After 2.10 the BibtexEntryType.java is changed a lot. The optional fields and required...,try to fix some obvious bugs about `groups`,,braindevices,...,Ling Wang,"05/20/14, 02:56:41 AM",fix bug causing double entries.,1,,,,,,


In [46]:
df_merge['isTrain'] = df_merge['isTrain'].replace(np.nan, 1)

In [47]:
df_merge.head()

Unnamed: 0,Issue_Number,Issue_Title,Issue_Author_Name,Issue_Author_Login,Issue_Closed_Date,Issue_Body,Issue_Comments,PR_Title,PR_Author_Name,PR_Author_Login,...,Commit_Author_Name,Commit_Date,Commit_Message,isPR,PR,Issue,Issue_Title_Linked,Issue_Body_Linked,Issue_Comment_Linked,isTrain
0,1,New Sorting/Export preferences,Olaf Lenz,olenz,"03/12/14, 11:38:01 AM","This will add a new ""File Sorting"" Tab to the preferences that allows to exactly speci...",,New Sorting/Export preferences,Olaf Lenz,olenz,...,Olaf Lenz,"03/12/14, 09:16:45 AM",Merge branch 'sorting',1,,,,,,1.0
1,2,Basic gradle integration,Simon Harrer,simonharrer,"03/12/14, 06:29:22 PM",This adds basic gradle integration. The project files for Intellij and Eclipse can be ...,,Basic gradle integration,Simon Harrer,simonharrer,...,Simon Harrer,"03/12/14, 05:20:08 PM",Gradle build works basically. Reuses existing folder structure.,1,,,,,,1.0
2,3,Some example new Dutch translations via the GitHub web page.,Egon Willighagen,egonw,"03/15/14, 12:55:32 PM",,,Some example new Dutch translations via the GitHub web page.,Egon Willighagen,egonw,...,Egon Willighagen,"03/15/14, 09:33:10 AM",Some example new Dutch translations via the GitHub web page.,1,,,,,,1.0
3,4,Spanish translation update,Jorge Tornero,jtornero,"03/17/14, 10:20:50 PM",Three new strings translated.,"Even though the other translation files have been updated, I accept this pull request....",Spanish translation update,Jorge Tornero,jtornero,...,jtornero,"03/17/14, 09:28:26 PM",Spanish translation update,1,,,,,,1.0
4,5,Update JabRef_in.properties,,was123,"03/18/14, 05:12:53 AM",Indonesian translation added,,Update JabRef_in.properties,,was123,...,was123,"03/18/14, 12:01:58 AM",Update JabRef_in.properties\n\nIndonesian translation added,1,,,,,,1.0


In [48]:
df_merge.to_csv( merge_output_filename, encoding='utf-8', header=True, index=False, sep=',') 

In [49]:
df_merge.to_csv( mergeNA_output_filename, encoding='utf-8', header=True, index=False, na_rep='NA', sep=',') 

In [50]:
newFields3.loc[newFields3['PR'] == 4113]

Unnamed: 0,PR,Issue,Issue_Title_Linked,Issue_Body_Linked,Issue_Comment_Linked,isTrain


In [51]:
df_merge.loc[df_merge['Issue_Number'] == 2300]


Unnamed: 0,Issue_Number,Issue_Title,Issue_Author_Name,Issue_Author_Login,Issue_Closed_Date,Issue_Body,Issue_Comments,PR_Title,PR_Author_Name,PR_Author_Login,...,Commit_Author_Name,Commit_Date,Commit_Message,isPR,PR,Issue,Issue_Title_Linked,Issue_Body_Linked,Issue_Comment_Linked,isTrain
2353,2300,Fix local metadata synchronization (fixes: #2219),Admir Obralija,obraliar,"11/20/16, 06:19:25 PM",Issue: https://github.com/JabRef/jabref/issues/2219.\r\n\r\nThis issue is fixed by ove...,"I merged it, because it is a hotfix. @obraliar Do you think, it is possible to add a t...",Fix local metadata synchronization (fixes: #2219),Admir Obralija,obraliar,...,Admir Obralija,"11/20/16, 05:21:07 PM",Fix local metadata synchronization\n\n- Overload the parse method in MetaDataParser\n-...,1,2300.0,2219,Metadata lost on reconnect to a shared database,JabRef 3.7dev--snapshot--2016-10-24--master--7d6ee2b\r\nlinux 3.13.0-100-generic i386 ...,What is the status of this issue? @tobiasdiez @obraliar \n =||= I'm unfortunately not ...,0.0


In [54]:
prIssues.loc[prIssues['Issue_Number'] == 1, 'PR_Body']


0    This will add a new "File Sorting" Tab to the preferences that allows to exactly speci...
Name: PR_Body, dtype: object

In [55]:
value = df_merge['PR_Title'].str.replace(",",'')
df_merge['PR_Title'] = value

value = df_merge['PR_Body'].str.replace(",",'')
df_merge['PR_Body'] = value

value = df_merge['Issue_Title'].str.replace(",",'')
df_merge['Issue_Title'] = value

value = df_merge['Issue_Body'].str.replace(",",'')
df_merge['Issue_Body'] = value

value = df_merge['Issue_Comments'].str.replace(",",'')
df_merge['Issue_Comments'] = value

value = df_merge['PR_Comments'].str.replace(",",'')
df_merge['PR_Comments'] = value

value = df_merge['Commit_Message'].str.replace(",",'')
df_merge['Commit_Message'] = value

value = df_merge['Issue_Title_Linked'].str.replace(",",'')
df_merge['Issue_Title_Linked'] = value

value = df_merge['Issue_Body_Linked'].str.replace(",",'')
df_merge['Issue_Body_Linked'] = value

value = df_merge['Issue_Comment_Linked'].str.replace(",",'')
df_merge['Issue_Comment_Linked'] = value


AttributeError: Can only use .str accessor with string values!

In [82]:
value = df_merge['PR_Title'].str.replace("\n",'')
df_merge['PR_Title'] = value

value = df_merge['PR_Body'].str.replace("\n",'')
df_merge['PR_Body'] = value

value = df_merge['Issue_Title'].str.replace("\n",'')
df_merge['Issue_Title'] = value

value = df_merge['Issue_Body'].str.replace("\n",'')
df_merge['Issue_Body'] = value

value = df_merge['Issue_Comments'].str.replace("\n",'')
df_merge['Issue_Comments'] = value

value = df_merge['PR_Comments'].str.replace("\n",'')
df_merge['PR_Comments'] = value

value = df_merge['Commit_Message'].str.replace("\n",'')
df_merge['Commit_Message'] = value

value = df_merge['Issue_Title_Linked'].str.replace("\n",'')
df_merge['Issue_Title_Linked'] = value

value = df_merge['Issue_Body_Linked'].str.replace("\n",'')
df_merge['Issue_Body_Linked'] = value

value = df_merge['Issue_Comment_Linked'].str.replace("\n",'')
df_merge['Issue_Comment_Linked'] = value


AttributeError: Can only use .str accessor with string values!

In [None]:
value = df_merge['PR_Title'].str.replace("'",'')
df_merge['PR_Title'] = value

value = df_merge['PR_Body'].str.replace("'",'')
df_merge['PR_Body'] = value

value = df_merge['Issue_Title'].str.replace("'",'')
df_merge['Issue_Title'] = value

value = df_merge['Issue_Body'].str.replace("'",'')
df_merge['Issue_Body'] = value

value = df_merge['Issue_Comments'].str.replace("'",'')
df_merge['Issue_Comments'] = value

value = df_merge['PR_Comments'].str.replace("'",'')
df_merge['PR_Comments'] = value

value = df_merge['Commit_Message'].str.replace("'",'')
df_merge['Commit_Message'] = value

value = df_merge['Issue_Title_Linked'].str.replace("'",'')
df_merge['Issue_Title_Linked'] = value

value = df_merge['Issue_Body_Linked'].str.replace("'",'')
df_merge['Issue_Body_Linked'] = value

value = df_merge['Issue_Comment_Linked'].str.replace("'",'')
df_merge['Issue_Comment_Linked'] = value


In [None]:
#value = df_merge['PR_Title'].str.replace("\r",'')
#df_merge['PR_Title'] = value

#value = df_merge['PR_Body'].str.replace("\r",'')
#df_merge['PR_Body'] = value

value = df_merge['Issue_Title'].str.replace("\r",'')
df_merge['Issue_Title'] = value

value = df_merge['Issue_Body'].str.replace("\r",'')
df_merge['Issue_Body'] = value

value = df_merge['Issue_Comments'].str.replace("\r",'')
df_merge['Issue_Comments'] = value

value = df_merge['PR_Comments'].str.replace("\r",'')
df_merge['PR_Comments'] = value

value = df_merge['Commit_Message'].str.replace("\r",'')
df_merge['Commit_Message'] = value

value = df_merge['Issue_Title_Linked'].str.replace("\r",'')
df_merge['Issue_Title_Linked'] = value

value = df_merge['Issue_Body_Linked'].str.replace("\r",'')
df_merge['Issue_Body_Linked'] = value

value = df_merge['Issue_Comment_Linked'].str.replace("\r",'')
df_merge['Issue_Comment_Linked'] = value


In [None]:
#value = df_merge['PR_Title'].str.replace(".",'')
#df_merge['PR_Title'] = value

#value = df_merge['PR_Body'].str.replace(".",'')
#df_merge['PR_Body'] = value

value = df_merge['Issue_Title'].str.replace(".",'')
df_merge['Issue_Title'] = value

value = df_merge['Issue_Body'].str.replace(".",'')
df_merge['Issue_Body'] = value

value = df_merge['Issue_Comments'].str.replace(".",'')
df_merge['Issue_Comments'] = value

value = df_merge['PR_Comments'].str.replace(".",'')
df_merge['PR_Comments'] = value

value = df_merge['Commit_Message'].str.replace(".",'')
df_merge['Commit_Message'] = value

value = df_merge['Issue_Title_Linked'].str.replace(".",'')
df_merge['Issue_Title_Linked'] = value

value = df_merge['Issue_Body_Linked'].str.replace(".",'')
df_merge['Issue_Body_Linked'] = value

value = df_merge['Issue_Comment_Linked'].str.replace(".",'')
df_merge['Issue_Comment_Linked'] = value


In [None]:
#value = df_merge['PR_Title'].str.replace(";",'')
#df_merge['PR_Title'] = value

#value = df_merge['PR_Body'].str.replace(";",'')
#df_merge['PR_Body'] = value

value = df_merge['Issue_Title'].str.replace(";",'')
df_merge['Issue_Title'] = value

value = df_merge['Issue_Body'].str.replace(";",'')
df_merge['Issue_Body'] = value

value = df_merge['Issue_Comments'].str.replace(";",'')
df_merge['Issue_Comments'] = value

value = df_merge['PR_Comments'].str.replace(";",'')
df_merge['PR_Comments'] = value

value = df_merge['Commit_Message'].str.replace(";",'')
df_merge['Commit_Message'] = value

value = df_merge['Issue_Title_Linked'].str.replace(";",'')
df_merge['Issue_Title_Linked'] = value

value = df_merge['Issue_Body_Linked'].str.replace(";",'')
df_merge['Issue_Body_Linked'] = value

value = df_merge['Issue_Comment_Linked'].str.replace(";",'')
df_merge['Issue_Comment_Linked'] = value

In [None]:
#value = df_merge['PR_Title'].str.replace("=||=",'')
#df_merge['PR_Title'] = value

#value = df_merge['PR_Body'].str.replace("=||=",'')
#df_merge['PR_Body'] = value

value = df_merge['Issue_Title'].str.replace("=||=",'')
df_merge['Issue_Title'] = value

value = df_merge['Issue_Body'].str.replace("=||=",'')
df_merge['Issue_Body'] = value

value = df_merge['Issue_Comments'].str.replace("=||=",'')
df_merge['Issue_Comments'] = value

value = df_merge['PR_Comments'].str.replace("=||=",'')
df_merge['PR_Comments'] = value

value = df_merge['Commit_Message'].str.replace("=||=",'')
df_merge['Commit_Message'] = value

value = df_merge['Issue_Title_Linked'].str.replace("=||=",'')
df_merge['Issue_Title_Linked'] = value

value = df_merge['Issue_Body_Linked'].str.replace("=||=",'')
df_merge['Issue_Body_Linked'] = value

value = df_merge['Issue_Comment_Linked'].str.replace("=||=",'')
df_merge['Issue_Comment_Linked'] = value

In [None]:
#value = df_merge['PR_Title'].str.replace("|=|",'')
#df_merge['PR_Title'] = value

#value = df_merge['PR_Body'].str.replace("|=|",'')
#df_merge['PR_Body'] = value

value = df_merge['Issue_Title'].str.replace("|=|",'')
df_merge['Issue_Title'] = value

value = df_merge['Issue_Body'].str.replace("|=|",'')
df_merge['Issue_Body'] = value

value = df_merge['Issue_Comments'].str.replace("|=|",'')
df_merge['Issue_Comments'] = value

value = df_merge['PR_Comments'].str.replace("|=|",'')
df_merge['PR_Comments'] = value

value = df_merge['Commit_Message'].str.replace("|=|",'')
df_merge['Commit_Message'] = value

value = df_merge['Issue_Title_Linked'].str.replace("|=|",'')
df_merge['Issue_Title_Linked'] = value

value = df_merge['Issue_Body_Linked'].str.replace("|=|",'')
df_merge['Issue_Body_Linked'] = value

value = df_merge['Issue_Comment_Linked'].str.replace("|=|",'')
df_merge['Issue_Comment_Linked'] = value

In [None]:
#value = df_merge['PR_Title'].str.replace("||",'')
#df_merge['PR_Title'] = value

#value = df_merge['PR_Body'].str.replace("||",'')
#df_merge['PR_Body'] = value

value = df_merge['Issue_Title'].str.replace("||",'')
df_merge['Issue_Title'] = value

value = df_merge['Issue_Body'].str.replace("||",'')
df_merge['Issue_Body'] = value

value = df_merge['Issue_Comments'].str.replace("||",'')
df_merge['Issue_Comments'] = value

value = df_merge['PR_Comments'].str.replace("||",'')
df_merge['PR_Comments'] = value

value = df_merge['Commit_Message'].str.replace("||",'')
df_merge['Commit_Message'] = value

value = df_merge['Issue_Title_Linked'].str.replace("||",'')
df_merge['Issue_Title_Linked'] = value

value = df_merge['Issue_Body_Linked'].str.replace("||",'')
df_merge['Issue_Body_Linked'] = value

value = df_merge['Issue_Comment_Linked'].str.replace("||",'')
df_merge['Issue_Comment_Linked'] = value

In [None]:
#df_merge['PR_Body'] = value

In [None]:
pd.options.display.max_colwidth = 900

In [None]:
df_merge.loc[df_merge['Issue_Number'] == 110]

In [None]:
df_merge.head()

In [None]:
#del df_merge['PR_Body2']

In [None]:
del df_merge['Issue_Closed_Date']
del df_merge['Issue_Author']
del df_merge['PR_Closed_Date']
del df_merge['PR_Author']
del df_merge['PR_Title']
del df_merge['PR_Body']
del df_merge['Commit_Author']
del df_merge['Commit_Date']

In [None]:
df_merge.head()

In [None]:
df_merge.to_csv( mergeNA_output_filename, encoding='utf-8', header=True, index=False, na_rep='NA', sep=',') 

In [None]:
df_merge.to_csv( merge_output_filename, encoding='utf-8', header=True, index=False, sep=',') 