In [1]:
import os
import xml.etree.ElementTree as elemTree
import json

In [2]:
def get_br(gp):
    path = f"{fpath}/{gp}"
    brs = os.listdir(path)
    return brs

# BR 정보, SF (commit)정보
def openxml(xmldir):
    #print(xmldir)
    tree = elemTree.parse(xmldir)
    root = tree.getroot()
    assert len(root) == 2
    bug = root[0]
    commit = root[1]
    return bug,commit

def getbugtext(bug):
    summary = bug.find('summary').text
    description = bug.find('description').text
    return summary, description

def getbuginfo(bug):
    bid = bug.attrib['id']
    author = bug.attrib['author']
    openT = bug.attrib['open_date']
    closeT = bug.attrib['closed_time']
    return bid, author, openT, closeT

def getCtext(comment):
    return comment.text

def getCinfo(comment):
    Cid = int(comment.attrib['id'])
    Cauthor = comment.attrib['author']
    Cdate = comment.attrib['date']
    return Cid, Cauthor, Cdate

# return dictionary
def getCdict(bug):
    comments = list(bug.find('comments'))
    Cdict = {int(C.attrib['id']):{} for C in comments}
    for C in comments:
        Ctext = getCtext(C)
        Cid, Cauthor, Cdate = getCinfo(C)
        Cdict[Cid]['text'] = Ctext
        Cdict[Cid]['author'] = Cauthor
        Cdict[Cid]['date'] = Cdate
    return Cdict


In [3]:
# commits
def get_dmm(commit):
    dmm = commit.find('dmm_unit')
    NLOC = dmm.attrib['size']
    CCN = dmm.attrib['complexity']
    NPRAM = dmm.attrib['interfacing']
    return NLOC, CCN, NPRAM

def get_commitinfo(commit):
    commitID = commit.attrib['id']
    committer=commit.attrib['author']
    commitDate=commit.attrib['date']
    ccomplexity = commit.find('dmm_unit')
    dmm = commit.find('dmm_unit')
    cNLOC = dmm.attrib['size']
    cCCN = dmm.attrib['complexity']
    cNPRAM = dmm.attrib['interfacing']
    return commitID, committer, commitDate, cNLOC, cCCN, cNPRAM
    
def get_files(commit):
    files = [x for x in list(commit) if x.tag!='dmm_unit' ]
    return files

#fixed files
def get_fileinfo(file):
    type_=file.attrib['change_type']
    Ofname=file.attrib['old_name']
    Nfname=file.attrib['new_name']
    fcomplexity = file.find('file_info')
    fNLOC = fcomplexity.attrib['nloc']
    fCCN = fcomplexity.attrib['complexity']
    fNTOKEN = fcomplexity.attrib['token_count']
    # modified method 수
    children = [x.tag for x in list(file)]
    children = [x for x in children if x=='method']
    Nmm = len(children)
    return type_, Ofname, Nfname, fNLOC,fCCN,fNTOKEN, Nmm

#method 아닌거
def get_Nmethodinfo(file):
    aline=file.find('added_lines').text
    dline=file.find('deleted_lines').text

# method
def get_methodinfo(method):
    mName = method.attrib['name']
    mParams = method.attrib['parameters']
    aline = method.find('added_lines').text
    dline = method.find('deleted_lines').text
    return mName, mParams, aline, dline

def get_methodattribs(method):
    infos = method.find('method_info')
    nloc = infos.attrib['nloc']
    complexity=infos.attrib['complexity']
    toks=infos.attrib['token_count']
    nlevel=infos.attrib['nesting_level']
    sline=infos.attrib['start_line']
    eline = infos.attrib['end_line']
    return nloc, complexity, toks, nlevel, sline, eline

In [4]:
fpath = "./Generator/data/final_dataset/full/"
gps = os.listdir(fpath)
print(len(gps),'projects')

BRs = get_br(gps[0])
print(len(BRs), BRs[0])

207 projects
2 182.xml


In [11]:
# Complex version
for GroupProject in gps:
    BRs = get_br(GroupProject)
    print(f'[{gps.index(GroupProject)}/{len(gps)}]',GroupProject, len(BRs))
    for BR in BRs:
        print(GroupProject, BR)
        finaljson = {'BR':{},'commit':{}}
        
        xmldir = f"{fpath}/{GroupProject}/{BR}"
        bug,commit = openxml(xmldir)
        # BR
        summary, description = getbugtext(bug)
        bid, author, openT, closeT = getbuginfo(bug)
        finaljson['BR']={'BR_id':bid,
                         'BR_author':author,
                         'BRopenT':openT,
                         'BRcloseT':closeT,
                         'BR_text':{'BRsummary':summary,
                                    'BRdescription':description
                                   },
                         'comments':{}
                        }

        #Comment
        comments = list(bug.find('comments'))
        cdict = {}
        for i,C in enumerate(comments):
            Ctext = getCtext(C)
            Cid, Cauthor, Cdate = getCinfo(C)

            key = f'comments_{i}'
            dict_ = {'comment_id':Cid,
                     'comment_author':Cauthor,
                     'commentT':Cdate,
                     'comment_text':Ctext
                    }
            
            cdict[key] = dict_
        finaljson['BR']['comments'] = cdict
            
              
        # commit
        commitID, committer, commitDate, cNLOC, cCCN, cNPRAM = get_commitinfo(commit)
        finaljson['commit']={'commit_id':commitID,
                             'commit_author':committer,
                             'commitT':commitDate,
                             'commit_complexity':{'commit_NLOC':cNLOC,
                                                  'commit_CCN':cCCN,
                                                  'commit_Nprams':cNPRAM
                                                 },
                             'changed_files':{}
                            }

        
        #file별로
        files = get_files(commit)
        fdict={}
        for i,file in enumerate(files):
            type_, Ofname, Nfname,fNLOC,fCCN,fNTOKEN,Nmm = get_fileinfo(file)
            key = f'file_{i}'
            fdict[key]={'file_change_type':type_,
                        'file_Nmethod':Nmm,
                        'file_old_name':Ofname,
                        'file_new_name':Nfname,
                        'file_complexity':{'file_NLOC':fNLOC,
                                           'file_CCN':fCCN,
                                           'file_NToken':fNTOKEN
                                          }
                       }

            
            if type_=='ADD' or type_=='DELETE' or type_=='RENAME':
                continue      
            
            # MODIFIED
            fdict[key]['hunks']={}
            if Nmm==0:
                hkey = f'hunk_0'
                modified_lines = file.find('modified_lines')
                aline=modified_lines.find('added_lines').text
                dline=modified_lines.find('deleted_lines').text
                fdict[key]['hunks'][hkey]={'Ismethod':0,
                                           'added_lines':aline,
                                           'deleted_lines':dline
                                          }
            else:
                methods = file.findall('method')
                for k,method in enumerate(methods):
                    hkey=f'hunk_{k}'
                    mName, mParams, aline, dline = get_methodinfo(method)
                    nloc, complexity, toks, nlevel, sline, eline = get_methodattribs(method)
                    fdict[key]['hunks'][hkey]={'Ismethod':1,
                                               'added_lines':aline,
                                               'deleted_lines':dline,
                                               'method_info':{'method_name':mName,
                                                              'method_params':mParams,
                                                              'method_startline':sline,
                                                              'method_endline':eline,
                                                              'method_complexity':{'method_NLOC':nloc,
                                                                                   'method_CCN':complexity,
                                                                                   'method_NToken':toks,
                                                                                   'method_nesting_level':nlevel
                                                                                  }
                                                             }
                                              }
            
        finaljson['commit']['changed_files']=fdict
        
        jsonpath = f'./JSonSet/fulljson/{GroupProject}/'
        if not os.path.isdir(jsonpath):
            os.makedirs(jsonpath)
        fname = BR.replace('.xml','.json')
        jsonpath = f'./JSonSet/fulljson/{GroupProject}/{fname}'
        with open(jsonpath,'w') as jf:
            json.dump(finaljson,jf)


[0/207] albu+albumentations 2
albu+albumentations 182.xml
albu+albumentations 207.xml
[1/207] albumentations-team+albumentations 6
albumentations-team+albumentations 180.xml
albumentations-team+albumentations 186.xml
albumentations-team+albumentations 207.xml
albumentations-team+albumentations 213.xml
albumentations-team+albumentations 215.xml
albumentations-team+albumentations 760.xml
[2/207] alexemg+deeplabcut 1
alexemg+deeplabcut 1018.xml
[3/207] alibaba+pipcook 10
alibaba+pipcook 101.xml
alibaba+pipcook 319.xml
alibaba+pipcook 337.xml
alibaba+pipcook 398.xml
alibaba+pipcook 445.xml
alibaba+pipcook 493.xml
alibaba+pipcook 520.xml
alibaba+pipcook 561.xml
alibaba+pipcook 639.xml
alibaba+pipcook 99.xml
[4/207] allegroai+clearml 4
allegroai+clearml 123.xml
allegroai+clearml 253.xml
allegroai+clearml 258.xml
allegroai+clearml 273.xml
[5/207] allenai+allennlp 31
allenai+allennlp 292.xml
allenai+allennlp 3426.xml
allenai+allennlp 3465.xml
allenai+allennlp 4255.xml
allenai+allennlp 4281.xml

apache+incubator-mxnet 9890.xml
apache+incubator-mxnet 9920.xml
apache+incubator-mxnet 9976.xml
[8/207] apache+tvm 7
apache+tvm 1007.xml
apache+tvm 2207.xml
apache+tvm 2898.xml
apache+tvm 3197.xml
apache+tvm 3240.xml
apache+tvm 552.xml
apache+tvm 7090.xml
[9/207] apple+coremltools 12
apple+coremltools 1001.xml
apple+coremltools 166.xml
apple+coremltools 504.xml
apple+coremltools 516.xml
apple+coremltools 687.xml
apple+coremltools 804.xml
apple+coremltools 810.xml
apple+coremltools 823.xml
apple+coremltools 866.xml
apple+coremltools 904.xml
apple+coremltools 982.xml
apple+coremltools 986.xml
[10/207] apple+turicreate 195
apple+turicreate 1033.xml
apple+turicreate 1046.xml
apple+turicreate 1053.xml
apple+turicreate 1056.xml
apple+turicreate 1057.xml
apple+turicreate 1065.xml
apple+turicreate 1070.xml
apple+turicreate 1088.xml
apple+turicreate 1094.xml
apple+turicreate 1096.xml
apple+turicreate 114.xml
apple+turicreate 1140.xml
apple+turicreate 1143.xml
apple+turicreate 1156.xml
apple+tur

awslabs+autogluon 685.xml
awslabs+autogluon 811.xml
[17/207] awslabs+djl 6
awslabs+djl 165.xml
awslabs+djl 201.xml
awslabs+djl 371.xml
awslabs+djl 383.xml
awslabs+djl 440.xml
awslabs+djl 493.xml
[18/207] awslabs+gluon-ts 41
awslabs+gluon-ts 1029.xml
awslabs+gluon-ts 1030.xml
awslabs+gluon-ts 1056.xml
awslabs+gluon-ts 1069.xml
awslabs+gluon-ts 107.xml
awslabs+gluon-ts 1094.xml
awslabs+gluon-ts 1157.xml
awslabs+gluon-ts 160.xml
awslabs+gluon-ts 161.xml
awslabs+gluon-ts 234.xml
awslabs+gluon-ts 275.xml
awslabs+gluon-ts 283.xml
awslabs+gluon-ts 291.xml
awslabs+gluon-ts 295.xml
awslabs+gluon-ts 310.xml
awslabs+gluon-ts 313.xml
awslabs+gluon-ts 322.xml
awslabs+gluon-ts 337.xml
awslabs+gluon-ts 419.xml
awslabs+gluon-ts 443.xml
awslabs+gluon-ts 469.xml
awslabs+gluon-ts 535.xml
awslabs+gluon-ts 550.xml
awslabs+gluon-ts 563.xml
awslabs+gluon-ts 581.xml
awslabs+gluon-ts 583.xml
awslabs+gluon-ts 589.xml
awslabs+gluon-ts 62.xml
awslabs+gluon-ts 621.xml
awslabs+gluon-ts 623.xml
awslabs+gluon-ts 629.

deepforge-dev+deepforge 153.xml
deepforge-dev+deepforge 1534.xml
deepforge-dev+deepforge 1540.xml
deepforge-dev+deepforge 1542.xml
deepforge-dev+deepforge 1554.xml
deepforge-dev+deepforge 1563.xml
deepforge-dev+deepforge 1566.xml
deepforge-dev+deepforge 1567.xml
deepforge-dev+deepforge 1570.xml
deepforge-dev+deepforge 1573.xml
deepforge-dev+deepforge 1575.xml
deepforge-dev+deepforge 1583.xml
deepforge-dev+deepforge 1585.xml
deepforge-dev+deepforge 1587.xml
deepforge-dev+deepforge 1591.xml
deepforge-dev+deepforge 1594.xml
deepforge-dev+deepforge 160.xml
deepforge-dev+deepforge 1603.xml
deepforge-dev+deepforge 1620.xml
deepforge-dev+deepforge 1621.xml
deepforge-dev+deepforge 1624.xml
deepforge-dev+deepforge 1627.xml
deepforge-dev+deepforge 1629.xml
deepforge-dev+deepforge 1641.xml
deepforge-dev+deepforge 1645.xml
deepforge-dev+deepforge 1650.xml
deepforge-dev+deepforge 1654.xml
deepforge-dev+deepforge 1656.xml
deepforge-dev+deepforge 1662.xml
deepforge-dev+deepforge 1665.xml
deepforge-de

dmlc+dgl 1754.xml
dmlc+dgl 1837.xml
dmlc+dgl 2087.xml
dmlc+dgl 2098.xml
dmlc+dgl 2118.xml
dmlc+dgl 2128.xml
dmlc+dgl 2157.xml
dmlc+dgl 2161.xml
dmlc+dgl 2166.xml
dmlc+dgl 2175.xml
dmlc+dgl 2409.xml
dmlc+dgl 2424.xml
dmlc+dgl 2473.xml
dmlc+dgl 2483.xml
dmlc+dgl 2484.xml
dmlc+dgl 2500.xml
dmlc+dgl 2528.xml
dmlc+dgl 354.xml
dmlc+dgl 412.xml
dmlc+dgl 438.xml
dmlc+dgl 538.xml
dmlc+dgl 637.xml
dmlc+dgl 757.xml
dmlc+dgl 761.xml
[52/207] dmlc+gluon-nlp 30
dmlc+gluon-nlp 1015.xml
dmlc+gluon-nlp 1019.xml
dmlc+gluon-nlp 1034.xml
dmlc+gluon-nlp 1055.xml
dmlc+gluon-nlp 1060.xml
dmlc+gluon-nlp 1086.xml
dmlc+gluon-nlp 1102.xml
dmlc+gluon-nlp 1129.xml
dmlc+gluon-nlp 1141.xml
dmlc+gluon-nlp 1142.xml
dmlc+gluon-nlp 1217.xml
dmlc+gluon-nlp 1218.xml
dmlc+gluon-nlp 1294.xml
dmlc+gluon-nlp 1315.xml
dmlc+gluon-nlp 1321.xml
dmlc+gluon-nlp 1367.xml
dmlc+gluon-nlp 1369.xml
dmlc+gluon-nlp 1416.xml
dmlc+gluon-nlp 1455.xml
dmlc+gluon-nlp 205.xml
dmlc+gluon-nlp 264.xml
dmlc+gluon-nlp 349.xml
dmlc+gluon-nlp 480.xml


explosion+spacy 5356.xml
explosion+spacy 536.xml
explosion+spacy 539.xml
explosion+spacy 541.xml
explosion+spacy 5435.xml
explosion+spacy 5728.xml
explosion+spacy 5729.xml
explosion+spacy 578.xml
explosion+spacy 5799.xml
explosion+spacy 5838.xml
explosion+spacy 5840.xml
explosion+spacy 5853.xml
explosion+spacy 5918.xml
explosion+spacy 5961.xml
explosion+spacy 6014.xml
explosion+spacy 6060.xml
explosion+spacy 6129.xml
explosion+spacy 6148.xml
explosion+spacy 624.xml
explosion+spacy 6260.xml
explosion+spacy 631.xml
explosion+spacy 6318.xml
explosion+spacy 6340.xml
explosion+spacy 6350.xml
explosion+spacy 6373.xml
explosion+spacy 6402.xml
explosion+spacy 6405.xml
explosion+spacy 672.xml
explosion+spacy 6737.xml
explosion+spacy 6755.xml
explosion+spacy 683.xml
explosion+spacy 755.xml
explosion+spacy 766.xml
explosion+spacy 795.xml
explosion+spacy 898.xml
explosion+spacy 912.xml
explosion+spacy 991.xml
[58/207] explosion+spacy-transformers 2
explosion+spacy-transformers 193.xml
explosion+sp

intel-isl+open3d 935.xml
intel-isl+open3d 944.xml
intel-isl+open3d 950.xml
[80/207] intellabs+coach 2
intellabs+coach 101.xml
intellabs+coach 324.xml
[81/207] intellabs+distiller 8
intellabs+distiller 115.xml
intellabs+distiller 122.xml
intellabs+distiller 155.xml
intellabs+distiller 168.xml
intellabs+distiller 198.xml
intellabs+distiller 213.xml
intellabs+distiller 249.xml
intellabs+distiller 326.xml
[82/207] intellabs+nlp-architect 2
intellabs+nlp-architect 117.xml
intellabs+nlp-architect 92.xml
[83/207] jina-ai+jina 13
jina-ai+jina 1072.xml
jina-ai+jina 1167.xml
jina-ai+jina 1229.xml
jina-ai+jina 1248.xml
jina-ai+jina 1255.xml
jina-ai+jina 1330.xml
jina-ai+jina 1333.xml
jina-ai+jina 1386.xml
jina-ai+jina 1404.xml
jina-ai+jina 1449.xml
jina-ai+jina 1521.xml
jina-ai+jina 1545.xml
jina-ai+jina 686.xml
[84/207] jolibrain+deepdetect 13
jolibrain+deepdetect 105.xml
jolibrain+deepdetect 116.xml
jolibrain+deepdetect 133.xml
jolibrain+deepdetect 151.xml
jolibrain+deepdetect 157.xml
jolibrain

microsoft+computervision-recipes 181.xml
microsoft+computervision-recipes 195.xml
microsoft+computervision-recipes 221.xml
microsoft+computervision-recipes 478.xml
microsoft+computervision-recipes 495.xml
microsoft+computervision-recipes 498.xml
microsoft+computervision-recipes 72.xml
microsoft+computervision-recipes 99.xml
[104/207] microsoft+deepspeed 10
microsoft+deepspeed 118.xml
microsoft+deepspeed 120.xml
microsoft+deepspeed 129.xml
microsoft+deepspeed 157.xml
microsoft+deepspeed 193.xml
microsoft+deepspeed 428.xml
microsoft+deepspeed 518.xml
microsoft+deepspeed 68.xml
microsoft+deepspeed 76.xml
microsoft+deepspeed 99.xml
[105/207] microsoft+forecasting 1
microsoft+forecasting 219.xml
[106/207] microsoft+mmdnn 2
microsoft+mmdnn 809.xml
microsoft+mmdnn 847.xml
[107/207] microsoft+nlp-recipes 2
microsoft+nlp-recipes 183.xml
microsoft+nlp-recipes 502.xml
[108/207] microsoft+nni 49
microsoft+nni 1076.xml
microsoft+nni 1164.xml
microsoft+nni 1192.xml
microsoft+nni 1204.xml
microsoft+n

nvidia+digits 59.xml
nvidia+digits 609.xml
nvidia+digits 659.xml
nvidia+digits 736.xml
[130/207] nvidia+nemo 8
nvidia+nemo 1289.xml
nvidia+nemo 1554.xml
nvidia+nemo 1556.xml
nvidia+nemo 1564.xml
nvidia+nemo 1566.xml
nvidia+nemo 1572.xml
nvidia+nemo 1606.xml
nvidia+nemo 1634.xml
[131/207] nvidia+tensorrt 2
nvidia+tensorrt 267.xml
nvidia+tensorrt 296.xml
[132/207] olivia-ai+olivia 2
olivia-ai+olivia 104.xml
olivia-ai+olivia 47.xml
[133/207] oneapi-src+onednn 42
oneapi-src+onednn 123.xml
oneapi-src+onednn 138.xml
oneapi-src+onednn 146.xml
oneapi-src+onednn 149.xml
oneapi-src+onednn 174.xml
oneapi-src+onednn 175.xml
oneapi-src+onednn 184.xml
oneapi-src+onednn 196.xml
oneapi-src+onednn 214.xml
oneapi-src+onednn 236.xml
oneapi-src+onednn 237.xml
oneapi-src+onednn 245.xml
oneapi-src+onednn 272.xml
oneapi-src+onednn 291.xml
oneapi-src+onednn 316.xml
oneapi-src+onednn 320.xml
oneapi-src+onednn 352.xml
oneapi-src+onednn 365.xml
oneapi-src+onednn 389.xml
oneapi-src+onednn 432.xml
oneapi-src+onedn

opennmt+opennmt-tf 20.xml
opennmt+opennmt-tf 221.xml
opennmt+opennmt-tf 275.xml
opennmt+opennmt-tf 276.xml
opennmt+opennmt-tf 289.xml
opennmt+opennmt-tf 329.xml
opennmt+opennmt-tf 338.xml
opennmt+opennmt-tf 363.xml
opennmt+opennmt-tf 365.xml
opennmt+opennmt-tf 371.xml
opennmt+opennmt-tf 374.xml
opennmt+opennmt-tf 377.xml
opennmt+opennmt-tf 380.xml
opennmt+opennmt-tf 391.xml
opennmt+opennmt-tf 414.xml
opennmt+opennmt-tf 416.xml
opennmt+opennmt-tf 435.xml
opennmt+opennmt-tf 491.xml
opennmt+opennmt-tf 542.xml
opennmt+opennmt-tf 544.xml
opennmt+opennmt-tf 568.xml
opennmt+opennmt-tf 576.xml
opennmt+opennmt-tf 655.xml
opennmt+opennmt-tf 665.xml
opennmt+opennmt-tf 695.xml
opennmt+opennmt-tf 701.xml
opennmt+opennmt-tf 80.xml
[144/207] openvinotoolkit+cvat 48
openvinotoolkit+cvat 103.xml
openvinotoolkit+cvat 1390.xml
openvinotoolkit+cvat 1543.xml
openvinotoolkit+cvat 1545.xml
openvinotoolkit+cvat 1613.xml
openvinotoolkit+cvat 1614.xml
openvinotoolkit+cvat 1620.xml
openvinotoolkit+cvat 1642.xml


pytorch+fairseq 2681.xml
pytorch+fairseq 2695.xml
pytorch+fairseq 2705.xml
pytorch+fairseq 2724.xml
pytorch+fairseq 2744.xml
pytorch+fairseq 2756.xml
pytorch+fairseq 2761.xml
[156/207] pytorch+ignite 40
pytorch+ignite 1040.xml
pytorch+ignite 1071.xml
pytorch+ignite 1076.xml
pytorch+ignite 1087.xml
pytorch+ignite 1096.xml
pytorch+ignite 1110.xml
pytorch+ignite 1115.xml
pytorch+ignite 1130.xml
pytorch+ignite 1133.xml
pytorch+ignite 114.xml
pytorch+ignite 1153.xml
pytorch+ignite 1198.xml
pytorch+ignite 1249.xml
pytorch+ignite 1285.xml
pytorch+ignite 1408.xml
pytorch+ignite 1426.xml
pytorch+ignite 1446.xml
pytorch+ignite 1476.xml
pytorch+ignite 241.xml
pytorch+ignite 340.xml
pytorch+ignite 348.xml
pytorch+ignite 373.xml
pytorch+ignite 409.xml
pytorch+ignite 411.xml
pytorch+ignite 486.xml
pytorch+ignite 499.xml
pytorch+ignite 503.xml
pytorch+ignite 510.xml
pytorch+ignite 514.xml
pytorch+ignite 530.xml
pytorch+ignite 541.xml
pytorch+ignite 615.xml
pytorch+ignite 752.xml
pytorch+ignite 782.xm

pytorchlightning+pytorch-lightning 3019.xml
pytorchlightning+pytorch-lightning 3032.xml
pytorchlightning+pytorch-lightning 3035.xml
pytorchlightning+pytorch-lightning 3053.xml
pytorchlightning+pytorch-lightning 3097.xml
pytorchlightning+pytorch-lightning 3104.xml
pytorchlightning+pytorch-lightning 3111.xml
pytorchlightning+pytorch-lightning 3143.xml
pytorchlightning+pytorch-lightning 3144.xml
pytorchlightning+pytorch-lightning 3162.xml
pytorchlightning+pytorch-lightning 3168.xml
pytorchlightning+pytorch-lightning 3172.xml
pytorchlightning+pytorch-lightning 3185.xml
pytorchlightning+pytorch-lightning 3189.xml
pytorchlightning+pytorch-lightning 3199.xml
pytorchlightning+pytorch-lightning 3233.xml
pytorchlightning+pytorch-lightning 3253.xml
pytorchlightning+pytorch-lightning 3259.xml
pytorchlightning+pytorch-lightning 326.xml
pytorchlightning+pytorch-lightning 3260.xml
pytorchlightning+pytorch-lightning 3276.xml
pytorchlightning+pytorch-lightning 3280.xml
pytorchlightning+pytorch-lightnin

ray-project+ray 1410.xml
ray-project+ray 1446.xml
ray-project+ray 1462.xml
ray-project+ray 1534.xml
ray-project+ray 1587.xml
ray-project+ray 1925.xml
ray-project+ray 1969.xml
ray-project+ray 2027.xml
ray-project+ray 2519.xml
ray-project+ray 2608.xml
ray-project+ray 2614.xml
ray-project+ray 2647.xml
ray-project+ray 266.xml
ray-project+ray 2731.xml
ray-project+ray 2751.xml
ray-project+ray 2909.xml
ray-project+ray 2939.xml
ray-project+ray 2941.xml
ray-project+ray 2959.xml
ray-project+ray 3045.xml
ray-project+ray 3048.xml
ray-project+ray 3056.xml
ray-project+ray 3057.xml
ray-project+ray 3069.xml
ray-project+ray 3190.xml
ray-project+ray 3214.xml
ray-project+ray 3260.xml
ray-project+ray 3277.xml
ray-project+ray 3301.xml
ray-project+ray 3367.xml
ray-project+ray 3440.xml
ray-project+ray 3470.xml
ray-project+ray 3494.xml
ray-project+ray 3651.xml
ray-project+ray 3684.xml
ray-project+ray 3775.xml
ray-project+ray 3780.xml
ray-project+ray 3854.xml
ray-project+ray 4099.xml
ray-project+ray 4103.xml
r

smistad+fast 130.xml
smistad+fast 134.xml
smistad+fast 137.xml
[171/207] spotify+featran 8
spotify+featran 110.xml
spotify+featran 119.xml
spotify+featran 143.xml
spotify+featran 185.xml
spotify+featran 303.xml
spotify+featran 31.xml
spotify+featran 35.xml
spotify+featran 71.xml
[172/207] sql-machine-learning+elasticdl 16
sql-machine-learning+elasticdl 1239.xml
sql-machine-learning+elasticdl 1259.xml
sql-machine-learning+elasticdl 1302.xml
sql-machine-learning+elasticdl 1331.xml
sql-machine-learning+elasticdl 1374.xml
sql-machine-learning+elasticdl 1396.xml
sql-machine-learning+elasticdl 1494.xml
sql-machine-learning+elasticdl 1808.xml
sql-machine-learning+elasticdl 1812.xml
sql-machine-learning+elasticdl 1814.xml
sql-machine-learning+elasticdl 1826.xml
sql-machine-learning+elasticdl 1829.xml
sql-machine-learning+elasticdl 2262.xml
sql-machine-learning+elasticdl 2367.xml
sql-machine-learning+elasticdl 726.xml
sql-machine-learning+elasticdl 760.xml
[173/207] sql-machine-learning+sqlflow

tensorflow+tensorflow 12436.xml
tensorflow+tensorflow 12569.xml
tensorflow+tensorflow 12608.xml
tensorflow+tensorflow 12641.xml
tensorflow+tensorflow 12902.xml
tensorflow+tensorflow 13202.xml
tensorflow+tensorflow 13431.xml
tensorflow+tensorflow 13506.xml
tensorflow+tensorflow 13526.xml
tensorflow+tensorflow 13536.xml
tensorflow+tensorflow 13558.xml
tensorflow+tensorflow 13576.xml
tensorflow+tensorflow 13764.xml
tensorflow+tensorflow 13827.xml
tensorflow+tensorflow 13885.xml
tensorflow+tensorflow 14292.xml
tensorflow+tensorflow 14455.xml
tensorflow+tensorflow 14542.xml
tensorflow+tensorflow 14739.xml
tensorflow+tensorflow 14776.xml
tensorflow+tensorflow 14800.xml
tensorflow+tensorflow 14819.xml
tensorflow+tensorflow 14942.xml
tensorflow+tensorflow 14985.xml
tensorflow+tensorflow 15034.xml
tensorflow+tensorflow 15239.xml
tensorflow+tensorflow 15345.xml
tensorflow+tensorflow 15611.xml
tensorflow+tensorflow 15766.xml
tensorflow+tensorflow 15882.xml
tensorflow+tensorflow 15891.xml
tensorfl

tensorflow+tensorflow 7906.xml
tensorflow+tensorflow 8011.xml
tensorflow+tensorflow 8364.xml
tensorflow+tensorflow 8718.xml
tensorflow+tensorflow 8809.xml
tensorflow+tensorflow 9047.xml
tensorflow+tensorflow 9089.xml
tensorflow+tensorflow 9103.xml
tensorflow+tensorflow 9136.xml
tensorflow+tensorflow 9161.xml
tensorflow+tensorflow 9312.xml
tensorflow+tensorflow 9633.xml
tensorflow+tensorflow 9931.xml
[185/207] tensorflow+tfjs 55
tensorflow+tfjs 1060.xml
tensorflow+tfjs 1779.xml
tensorflow+tfjs 1898.xml
tensorflow+tfjs 1908.xml
tensorflow+tfjs 1912.xml
tensorflow+tfjs 2170.xml
tensorflow+tfjs 2177.xml
tensorflow+tfjs 2194.xml
tensorflow+tfjs 2205.xml
tensorflow+tfjs 2263.xml
tensorflow+tfjs 2317.xml
tensorflow+tfjs 2624.xml
tensorflow+tfjs 2632.xml
tensorflow+tfjs 2643.xml
tensorflow+tfjs 2660.xml
tensorflow+tfjs 2683.xml
tensorflow+tfjs 2698.xml
tensorflow+tfjs 2753.xml
tensorflow+tfjs 2998.xml
tensorflow+tfjs 3095.xml
tensorflow+tfjs 3236.xml
tensorflow+tfjs 3241.xml
tensorflow+tfjs 32

wandb+client 1319.xml
wandb+client 1321.xml
wandb+client 1466.xml
wandb+client 549.xml
wandb+client 618.xml
wandb+client 656.xml
wandb+client 734.xml
[202/207] williamfalcon+pytorch-lightning 54
williamfalcon+pytorch-lightning 1290.xml
williamfalcon+pytorch-lightning 138.xml
williamfalcon+pytorch-lightning 1388.xml
williamfalcon+pytorch-lightning 142.xml
williamfalcon+pytorch-lightning 1468.xml
williamfalcon+pytorch-lightning 1522.xml
williamfalcon+pytorch-lightning 154.xml
williamfalcon+pytorch-lightning 157.xml
williamfalcon+pytorch-lightning 1570.xml
williamfalcon+pytorch-lightning 1815.xml
williamfalcon+pytorch-lightning 2058.xml
williamfalcon+pytorch-lightning 2143.xml
williamfalcon+pytorch-lightning 224.xml
williamfalcon+pytorch-lightning 2333.xml
williamfalcon+pytorch-lightning 2334.xml
williamfalcon+pytorch-lightning 2386.xml
williamfalcon+pytorch-lightning 2479.xml
williamfalcon+pytorch-lightning 249.xml
williamfalcon+pytorch-lightning 2551.xml
williamfalcon+pytorch-lightning 

In [12]:
### simple version ######
for GroupProject in gps:
    BRs = get_br(GroupProject)
    print(f'[{gps.index(GroupProject)}/{len(gps)}]',GroupProject, len(BRs))
    for BR in BRs:
        finaljson = {'BR':{},'commit':{}}
        
        xmldir = f"{fpath}/{GroupProject}/{BR}"
        bug,commit = openxml(xmldir)
        # BR
        summary, description = getbugtext(bug)
        bid, author, openT, closeT = getbuginfo(bug)
        finaljson['BR']={'BR_id':bid,
                         'BR_author':author,
                         'BRopenT':openT,
                         'BRcloseT':closeT,
                         'BR_text':{'BRsummary':summary,
                                    'BRdescription':description
                                   },
                         'comments':{}
                        }

        #Comment
        comments = list(bug.find('comments'))
        cdict = {}
        for i,C in enumerate(comments):
            Ctext = getCtext(C)
            Cid, Cauthor, Cdate = getCinfo(C)

            key = f'comments_{i}'
            dict_ = {'comment_id':Cid,
                     'comment_author':Cauthor,
                     'commentT':Cdate,
                     'comment_text':Ctext
                    }
            
            cdict[key] = dict_
        finaljson['BR']['comments'] = cdict
            
              
        # commit
        commitID, committer, commitDate, cNLOC, cCCN, cNPRAM = get_commitinfo(commit)
        finaljson['commit']={'commit_id':commitID,
                             'commit_author':committer,
                             'commitT':commitDate,
                             'changed_files':{}
                            }

        
        #file별로
        files = get_files(commit)
        fdict={}
        for i,file in enumerate(files):
            type_, Ofname, Nfname,fNLOC,fCCN,fNTOKEN,Nmm = get_fileinfo(file)
            key = f'file_{i}'
            fdict[key]={'file_change_type':type_,
                        'file_Nmethod':Nmm,
                        'file_old_name':Ofname,
                        'file_new_name':Nfname
                       }

            
            if type_=='ADD' or type_=='DELETE' or type_=='RENAME':
                continue      
            
            # MODIFIED
            fdict[key]['hunks']={}
            if Nmm==0:
                hkey = f'hunk_0'
                modified_lines = file.find('modified_lines')
                aline=modified_lines.find('added_lines').text
                dline=modified_lines.find('deleted_lines').text
                fdict[key]['hunks'][hkey]={'Ismethod':0,
                                           'added_lines':aline,
                                           'deleted_lines':dline
                                          }
            else:
                methods = file.findall('method')
                for k,method in enumerate(methods):
                    hkey=f'hunk_{k}'
                    mName, mParams, aline, dline = get_methodinfo(method)
                    nloc, complexity, toks, nlevel, sline, eline = get_methodattribs(method)
                    fdict[key]['hunks'][hkey]={'Ismethod':1,
                                               'added_lines':aline,
                                               'deleted_lines':dline,
                                               'method_info':{'method_name':mName,
                                                              'method_params':mParams,
                                                              'method_startline':sline,
                                                              'method_endline':eline
                                                             }
                                              }
            
        finaljson['commit']['changed_files']=fdict
        
        jsonpath = f'./JSonSet/simplejson/{GroupProject}/'
        if not os.path.isdir(jsonpath):
            os.makedirs(jsonpath)
        fname = BR.replace('.xml','.json')
        jsonpath = f'./JSonSet/simplejson/{GroupProject}/{fname}'
        with open(jsonpath,'w') as jf:
            json.dump(finaljson,jf)

[0/207] albu+albumentations 2
[1/207] albumentations-team+albumentations 6
[2/207] alexemg+deeplabcut 1
[3/207] alibaba+pipcook 10
[4/207] allegroai+clearml 4
[5/207] allenai+allennlp 31
[6/207] amaiya+ktrain 1
[7/207] apache+incubator-mxnet 252
[8/207] apache+tvm 7
[9/207] apple+coremltools 12
[10/207] apple+turicreate 195
[11/207] arraiyopensource+kornia 5
[12/207] asteroid-team+asteroid 8
[13/207] asyml+texar-pytorch 14
[14/207] avinashpaliwal+super-slomo 5
[15/207] aws+sagemaker-python-sdk 28
[16/207] awslabs+autogluon 24
[17/207] awslabs+djl 6
[18/207] awslabs+gluon-ts 41
[19/207] awslabs+multi-model-server 7
[20/207] awslabs+mxnet-model-server 4
[21/207] awslabs+sockeye 5
[22/207] azure+mmlspark 8
[23/207] bentoml+bentoml 49
[24/207] bhrnjica+anndotnet 2
[25/207] bindsnet+bindsnet 1
[26/207] blue-oil+blueoil 16
[27/207] breta01+handwriting-ocr 7
[28/207] bvlc+caffe 1
[29/207] bytedance+byteps 7
[30/207] catalyst-team+catalyst 22
[31/207] cdqa-suite+cdqa 2
[32/207] cgre-aachen+gem